diff options
Diffstat (limited to 'drivers/gpu/drm')
912 files changed, 36410 insertions, 24404 deletions
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index a3fdc5a68dff..e9500844333e 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. @@ -17,7 +18,7 @@ drm-y := drm_auth.o drm_bufs.o drm_cache.o \ drm_encoder.o drm_mode_object.o drm_property.o \ drm_plane.o drm_color_mgmt.o drm_print.o \ drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \ - drm_syncobj.o + drm_syncobj.o drm_lease.o drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o drm-$(CONFIG_DRM_VM) += drm_vm.o diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 454e6efeb5cf..78d609123420 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. @@ -31,7 +32,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ - amdgpu_queue_mgr.o amdgpu_vf_error.o + amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d161da95fffd..5afaf6016b4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -735,10 +735,14 @@ struct amdgpu_ctx { struct amdgpu_device *adev; struct amdgpu_queue_mgr queue_mgr; unsigned reset_counter; + uint32_t vram_lost_counter; spinlock_t ring_lock; struct dma_fence **fences; struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; - bool preamble_presented; + bool preamble_presented; + enum amd_sched_priority init_priority; + enum amd_sched_priority override_priority; + struct mutex lock; }; struct amdgpu_ctx_mgr { @@ -755,13 +759,18 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, struct dma_fence *fence, uint64_t *seq); struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, uint64_t seq); +void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, + enum amd_sched_priority priority); int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id); + void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); + /* * file private structure */ @@ -773,7 +782,6 @@ struct amdgpu_fpriv { struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; - u32 vram_lost_counter; }; /* @@ -874,7 +882,7 @@ struct amdgpu_mec { struct amdgpu_kiq { u64 eop_gpu_addr; struct amdgpu_bo *eop_obj; - struct mutex ring_mutex; + spinlock_t ring_lock; struct amdgpu_ring ring; struct amdgpu_irq_src irq; }; @@ -1038,6 +1046,10 @@ struct amdgpu_gfx { bool in_suspend; /* NGG */ struct amdgpu_ngg ngg; + + /* pipe reservation */ + struct mutex pipe_reserve_mutex; + DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); }; int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, @@ -1116,6 +1128,7 @@ struct amdgpu_job { uint32_t gds_base, gds_size; uint32_t gws_base, gws_size; uint32_t oa_base, oa_size; + uint32_t vram_lost_counter; /* user fence handling */ uint64_t uf_addr; @@ -1141,7 +1154,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, /* * Writeback */ -#define AMDGPU_MAX_WB 1024 /* Reserve at most 1024 WB slots for amdgpu-owned rings. */ +#define AMDGPU_MAX_WB 512 /* Reserve at most 512 WB slots for amdgpu-owned rings. */ struct amdgpu_wb { struct amdgpu_bo *wb_obj; @@ -1382,6 +1395,18 @@ struct amdgpu_atcs { }; /* + * Firmware VRAM reservation + */ +struct amdgpu_fw_vram_usage { + u64 start_offset; + u64 size; + struct amdgpu_bo *reserved_bo; + void *va; +}; + +int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev); + +/* * CGS */ struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev); @@ -1589,6 +1614,8 @@ struct amdgpu_device { struct delayed_work late_init_work; struct amdgpu_virt virt; + /* firmware VRAM reservation */ + struct amdgpu_fw_vram_usage fw_vram_usage; /* link all shadow bo */ struct list_head shadow_list; @@ -1843,8 +1870,6 @@ static inline bool amdgpu_has_atpx(void) { return false; } extern const struct drm_ioctl_desc amdgpu_ioctls_kms[]; extern const int amdgpu_max_kms_ioctl; -bool amdgpu_kms_vram_lost(struct amdgpu_device *adev, - struct amdgpu_fpriv *fpriv); int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags); void amdgpu_driver_unload_kms(struct drm_device *dev); void amdgpu_driver_lastclose_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index a52795d9b458..c04f44a90392 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -35,41 +35,50 @@ #include "acp_gfx_if.h" -#define ACP_TILE_ON_MASK 0x03 -#define ACP_TILE_OFF_MASK 0x02 -#define ACP_TILE_ON_RETAIN_REG_MASK 0x1f -#define ACP_TILE_OFF_RETAIN_REG_MASK 0x20 - -#define ACP_TILE_P1_MASK 0x3e -#define ACP_TILE_P2_MASK 0x3d -#define ACP_TILE_DSP0_MASK 0x3b -#define ACP_TILE_DSP1_MASK 0x37 - -#define ACP_TILE_DSP2_MASK 0x2f - -#define ACP_DMA_REGS_END 0x146c0 -#define ACP_I2S_PLAY_REGS_START 0x14840 -#define ACP_I2S_PLAY_REGS_END 0x148b4 -#define ACP_I2S_CAP_REGS_START 0x148b8 -#define ACP_I2S_CAP_REGS_END 0x1496c - -#define ACP_I2S_COMP1_CAP_REG_OFFSET 0xac -#define ACP_I2S_COMP2_CAP_REG_OFFSET 0xa8 -#define ACP_I2S_COMP1_PLAY_REG_OFFSET 0x6c -#define ACP_I2S_COMP2_PLAY_REG_OFFSET 0x68 - -#define mmACP_PGFSM_RETAIN_REG 0x51c9 -#define mmACP_PGFSM_CONFIG_REG 0x51ca -#define mmACP_PGFSM_READ_REG_0 0x51cc - -#define mmACP_MEM_SHUT_DOWN_REQ_LO 0x51f8 -#define mmACP_MEM_SHUT_DOWN_REQ_HI 0x51f9 -#define mmACP_MEM_SHUT_DOWN_STS_LO 0x51fa -#define mmACP_MEM_SHUT_DOWN_STS_HI 0x51fb - -#define ACP_TIMEOUT_LOOP 0x000000FF -#define ACP_DEVS 3 -#define ACP_SRC_ID 162 +#define ACP_TILE_ON_MASK 0x03 +#define ACP_TILE_OFF_MASK 0x02 +#define ACP_TILE_ON_RETAIN_REG_MASK 0x1f +#define ACP_TILE_OFF_RETAIN_REG_MASK 0x20 + +#define ACP_TILE_P1_MASK 0x3e +#define ACP_TILE_P2_MASK 0x3d +#define ACP_TILE_DSP0_MASK 0x3b +#define ACP_TILE_DSP1_MASK 0x37 + +#define ACP_TILE_DSP2_MASK 0x2f + +#define ACP_DMA_REGS_END 0x146c0 +#define ACP_I2S_PLAY_REGS_START 0x14840 +#define ACP_I2S_PLAY_REGS_END 0x148b4 +#define ACP_I2S_CAP_REGS_START 0x148b8 +#define ACP_I2S_CAP_REGS_END 0x1496c + +#define ACP_I2S_COMP1_CAP_REG_OFFSET 0xac +#define ACP_I2S_COMP2_CAP_REG_OFFSET 0xa8 +#define ACP_I2S_COMP1_PLAY_REG_OFFSET 0x6c +#define ACP_I2S_COMP2_PLAY_REG_OFFSET 0x68 + +#define mmACP_PGFSM_RETAIN_REG 0x51c9 +#define mmACP_PGFSM_CONFIG_REG 0x51ca +#define mmACP_PGFSM_READ_REG_0 0x51cc + +#define mmACP_MEM_SHUT_DOWN_REQ_LO 0x51f8 +#define mmACP_MEM_SHUT_DOWN_REQ_HI 0x51f9 +#define mmACP_MEM_SHUT_DOWN_STS_LO 0x51fa +#define mmACP_MEM_SHUT_DOWN_STS_HI 0x51fb + +#define mmACP_CONTROL 0x5131 +#define mmACP_STATUS 0x5133 +#define mmACP_SOFT_RESET 0x5134 +#define ACP_CONTROL__ClkEn_MASK 0x1 +#define ACP_SOFT_RESET__SoftResetAud_MASK 0x100 +#define ACP_SOFT_RESET__SoftResetAudDone_MASK 0x1000000 +#define ACP_CLOCK_EN_TIME_OUT_VALUE 0x000000FF +#define ACP_SOFT_RESET_DONE_TIME_OUT_VALUE 0x000000FF + +#define ACP_TIMEOUT_LOOP 0x000000FF +#define ACP_DEVS 3 +#define ACP_SRC_ID 162 enum { ACP_TILE_P1 = 0, @@ -260,6 +269,8 @@ static int acp_hw_init(void *handle) { int r, i; uint64_t acp_base; + u32 val = 0; + u32 count = 0; struct device *dev; struct i2s_platform_data *i2s_pdata; @@ -371,6 +382,8 @@ static int acp_hw_init(void *handle) adev->acp.acp_cell[0].name = "acp_audio_dma"; adev->acp.acp_cell[0].num_resources = 4; adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; + adev->acp.acp_cell[0].platform_data = &adev->asic_type; + adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type); adev->acp.acp_cell[1].name = "designware-i2s"; adev->acp.acp_cell[1].num_resources = 1; @@ -400,6 +413,46 @@ static int acp_hw_init(void *handle) } } + /* Assert Soft reset of ACP */ + val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); + + val |= ACP_SOFT_RESET__SoftResetAud_MASK; + cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val); + + count = ACP_SOFT_RESET_DONE_TIME_OUT_VALUE; + while (true) { + val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); + if (ACP_SOFT_RESET__SoftResetAudDone_MASK == + (val & ACP_SOFT_RESET__SoftResetAudDone_MASK)) + break; + if (--count == 0) { + dev_err(&adev->pdev->dev, "Failed to reset ACP\n"); + return -ETIMEDOUT; + } + udelay(100); + } + /* Enable clock to ACP and wait until the clock is enabled */ + val = cgs_read_register(adev->acp.cgs_device, mmACP_CONTROL); + val = val | ACP_CONTROL__ClkEn_MASK; + cgs_write_register(adev->acp.cgs_device, mmACP_CONTROL, val); + + count = ACP_CLOCK_EN_TIME_OUT_VALUE; + + while (true) { + val = cgs_read_register(adev->acp.cgs_device, mmACP_STATUS); + if (val & (u32) 0x1) + break; + if (--count == 0) { + dev_err(&adev->pdev->dev, "Failed to reset ACP\n"); + return -ETIMEDOUT; + } + udelay(100); + } + /* Deassert the SOFT RESET flags */ + val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); + val &= ~ACP_SOFT_RESET__SoftResetAud_MASK; + cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val); + return 0; } @@ -412,6 +465,8 @@ static int acp_hw_init(void *handle) static int acp_hw_fini(void *handle) { int i, ret; + u32 val = 0; + u32 count = 0; struct device *dev; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -419,6 +474,42 @@ static int acp_hw_fini(void *handle) if (!adev->acp.acp_cell) return 0; + /* Assert Soft reset of ACP */ + val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); + + val |= ACP_SOFT_RESET__SoftResetAud_MASK; + cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val); + + count = ACP_SOFT_RESET_DONE_TIME_OUT_VALUE; + while (true) { + val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); + if (ACP_SOFT_RESET__SoftResetAudDone_MASK == + (val & ACP_SOFT_RESET__SoftResetAudDone_MASK)) + break; + if (--count == 0) { + dev_err(&adev->pdev->dev, "Failed to reset ACP\n"); + return -ETIMEDOUT; + } + udelay(100); + } + /* Disable ACP clock */ + val = cgs_read_register(adev->acp.cgs_device, mmACP_CONTROL); + val &= ~ACP_CONTROL__ClkEn_MASK; + cgs_write_register(adev->acp.cgs_device, mmACP_CONTROL, val); + + count = ACP_CLOCK_EN_TIME_OUT_VALUE; + + while (true) { + val = cgs_read_register(adev->acp.cgs_device, mmACP_STATUS); + if (val & (u32) 0x1) + break; + if (--count == 0) { + dev_err(&adev->pdev->dev, "Failed to reset ACP\n"); + return -ETIMEDOUT; + } + udelay(100); + } + if (adev->acp.acp_genpd) { for (i = 0; i < ACP_DEVS ; i++) { dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index ce443586a0c7..f450b69323fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1766,34 +1766,32 @@ bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev) return true; } -/* Atom needs data in little endian format - * so swap as appropriate when copying data to - * or from atom. Note that atom operates on - * dw units. +/* Atom needs data in little endian format so swap as appropriate when copying + * data to or from atom. Note that atom operates on dw units. + * + * Use to_le=true when sending data to atom and provide at least + * ALIGN(num_bytes,4) bytes in the dst buffer. + * + * Use to_le=false when receiving data from atom and provide ALIGN(num_bytes,4) + * byes in the src buffer. */ void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) { #ifdef __BIG_ENDIAN - u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */ - u32 *dst32, *src32; + u32 src_tmp[5], dst_tmp[5]; int i; + u8 align_num_bytes = ALIGN(num_bytes, 4); - memcpy(src_tmp, src, num_bytes); - src32 = (u32 *)src_tmp; - dst32 = (u32 *)dst_tmp; if (to_le) { - for (i = 0; i < ((num_bytes + 3) / 4); i++) - dst32[i] = cpu_to_le32(src32[i]); - memcpy(dst, dst_tmp, num_bytes); + memcpy(src_tmp, src, num_bytes); + for (i = 0; i < align_num_bytes / 4; i++) + dst_tmp[i] = cpu_to_le32(src_tmp[i]); + memcpy(dst, dst_tmp, align_num_bytes); } else { - u8 dws = num_bytes & ~3; - for (i = 0; i < ((num_bytes + 3) / 4); i++) - dst32[i] = le32_to_cpu(src32[i]); - memcpy(dst, dst_tmp, dws); - if (num_bytes % 4) { - for (i = 0; i < (num_bytes % 4); i++) - dst[dws+i] = dst_tmp[dws+i]; - } + memcpy(src_tmp, src, align_num_bytes); + for (i = 0; i < align_num_bytes / 4; i++) + dst_tmp[i] = le32_to_cpu(src_tmp[i]); + memcpy(dst, dst_tmp, num_bytes); } #else memcpy(dst, src, num_bytes); @@ -1807,6 +1805,8 @@ int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev) uint16_t data_offset; int usage_bytes = 0; struct _ATOM_VRAM_USAGE_BY_FIRMWARE *firmware_usage; + u64 start_addr; + u64 size; if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) { firmware_usage = (struct _ATOM_VRAM_USAGE_BY_FIRMWARE *)(ctx->bios + data_offset); @@ -1815,7 +1815,21 @@ int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev) le32_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware), le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb)); - usage_bytes = le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024; + start_addr = firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware; + size = firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb; + + if ((uint32_t)(start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) == + (uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION << + ATOM_VRAM_OPERATION_FLAGS_SHIFT)) { + /* Firmware request VRAM reservation for SR-IOV */ + adev->fw_vram_usage.start_offset = (start_addr & + (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; + adev->fw_vram_usage.size = size << 10; + /* Use the default scratch size */ + usage_bytes = 0; + } else { + usage_bytes = le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024; + } } ctx->scratch_size_bytes = 0; if (usage_bytes == 0) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index f9ffe8ef0cd6..ff8efd0f8fd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -71,19 +71,33 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) struct atom_context *ctx = adev->mode_info.atom_context; int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, vram_usagebyfirmware); + struct vram_usagebyfirmware_v2_1 * firmware_usage; + uint32_t start_addr, size; uint16_t data_offset; int usage_bytes = 0; if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) { - struct vram_usagebyfirmware_v2_1 *firmware_usage = - (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset); - + firmware_usage = (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset); DRM_DEBUG("atom firmware requested %08x %dkb fw %dkb drv\n", le32_to_cpu(firmware_usage->start_address_in_kb), le16_to_cpu(firmware_usage->used_by_firmware_in_kb), le16_to_cpu(firmware_usage->used_by_driver_in_kb)); - usage_bytes = le16_to_cpu(firmware_usage->used_by_driver_in_kb) * 1024; + start_addr = le32_to_cpu(firmware_usage->start_address_in_kb); + size = le16_to_cpu(firmware_usage->used_by_firmware_in_kb); + + if ((uint32_t)(start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) == + (uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION << + ATOM_VRAM_OPERATION_FLAGS_SHIFT)) { + /* Firmware request VRAM reservation for SR-IOV */ + adev->fw_vram_usage.start_offset = (start_addr & + (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; + adev->fw_vram_usage.size = size << 10; + /* Use the default scratch size */ + usage_bytes = 0; + } else { + usage_bytes = le16_to_cpu(firmware_usage->used_by_driver_in_kb) << 10; + } } ctx->scratch_size_bytes = 0; if (usage_bytes == 0) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ab83dfcabb41..6c78623e1386 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -90,12 +90,14 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) goto free_chunk; } + mutex_lock(&p->ctx->lock); + /* get chunks */ chunk_array_user = u64_to_user_ptr(cs->in.chunks); if (copy_from_user(chunk_array, chunk_array_user, sizeof(uint64_t)*cs->in.num_chunks)) { ret = -EFAULT; - goto put_ctx; + goto free_chunk; } p->nchunks = cs->in.num_chunks; @@ -103,7 +105,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) GFP_KERNEL); if (!p->chunks) { ret = -ENOMEM; - goto put_ctx; + goto free_chunk; } for (i = 0; i < p->nchunks; i++) { @@ -170,6 +172,11 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) if (ret) goto free_all_kdata; + if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) { + ret = -ECANCELED; + goto free_all_kdata; + } + if (p->uf_entry.robj) p->job->uf_addr = uf_offset; kfree(chunk_array); @@ -183,8 +190,6 @@ free_partial_kdata: kfree(p->chunks); p->chunks = NULL; p->nchunks = 0; -put_ctx: - amdgpu_ctx_put(p->ctx); free_chunk: kfree(chunk_array); @@ -557,8 +562,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, * invalidated it. Free it and try again */ release_pages(e->user_pages, - bo->tbo.ttm->num_pages, - false); + bo->tbo.ttm->num_pages); kvfree(e->user_pages); e->user_pages = NULL; } @@ -689,8 +693,7 @@ error_free_pages: continue; release_pages(e->user_pages, - e->robj->tbo.ttm->num_pages, - false); + e->robj->tbo.ttm->num_pages); kvfree(e->user_pages); } } @@ -705,7 +708,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) list_for_each_entry(e, &p->validated, tv.head) { struct reservation_object *resv = e->robj->tbo.resv; - r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp); + r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp, + amdgpu_bo_explicit_sync(e->robj)); if (r) return r; @@ -736,8 +740,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, dma_fence_put(parser->fence); - if (parser->ctx) + if (parser->ctx) { + mutex_unlock(&parser->ctx->lock); amdgpu_ctx_put(parser->ctx); + } if (parser->bo_list) amdgpu_bo_list_put(parser->bo_list); @@ -844,14 +850,58 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_ring *ring = p->job->ring; - int i, r; + int r; /* Only for UVD/VCE VM emulation */ - if (ring->funcs->parse_cs) { - for (i = 0; i < p->job->num_ibs; i++) { - r = amdgpu_ring_parse_cs(ring, p, i); + if (p->job->ring->funcs->parse_cs) { + unsigned i, j; + + for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { + struct drm_amdgpu_cs_chunk_ib *chunk_ib; + struct amdgpu_bo_va_mapping *m; + struct amdgpu_bo *aobj = NULL; + struct amdgpu_cs_chunk *chunk; + struct amdgpu_ib *ib; + uint64_t offset; + uint8_t *kptr; + + chunk = &p->chunks[i]; + ib = &p->job->ibs[j]; + chunk_ib = chunk->kdata; + + if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) + continue; + + r = amdgpu_cs_find_mapping(p, chunk_ib->va_start, + &aobj, &m); + if (r) { + DRM_ERROR("IB va_start is invalid\n"); + return r; + } + + if ((chunk_ib->va_start + chunk_ib->ib_bytes) > + (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { + DRM_ERROR("IB va_start+ib_bytes is invalid\n"); + return -EINVAL; + } + + /* the IB should be reserved at this point */ + r = amdgpu_bo_kmap(aobj, (void **)&kptr); + if (r) { + return r; + } + + offset = m->start * AMDGPU_GPU_PAGE_SIZE; + kptr += chunk_ib->va_start - offset; + + memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); + amdgpu_bo_kunmap(aobj); + + r = amdgpu_ring_parse_cs(ring, p, j); if (r) return r; + + j++; } } @@ -918,54 +968,18 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, parser->job->ring = ring; - if (ring->funcs->parse_cs) { - struct amdgpu_bo_va_mapping *m; - struct amdgpu_bo *aobj = NULL; - uint64_t offset; - uint8_t *kptr; - - r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start, - &aobj, &m); - if (r) { - DRM_ERROR("IB va_start is invalid\n"); - return r; - } - - if ((chunk_ib->va_start + chunk_ib->ib_bytes) > - (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { - DRM_ERROR("IB va_start+ib_bytes is invalid\n"); - return -EINVAL; - } - - /* the IB should be reserved at this point */ - r = amdgpu_bo_kmap(aobj, (void **)&kptr); - if (r) { - return r; - } - - offset = m->start * AMDGPU_GPU_PAGE_SIZE; - kptr += chunk_ib->va_start - offset; - - r = amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib); - if (r) { - DRM_ERROR("Failed to get ib !\n"); - return r; - } - - memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); - amdgpu_bo_kunmap(aobj); - } else { - r = amdgpu_ib_get(adev, vm, 0, ib); - if (r) { - DRM_ERROR("Failed to get ib !\n"); - return r; - } - + r = amdgpu_ib_get(adev, vm, + ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0, + ib); + if (r) { + DRM_ERROR("Failed to get ib !\n"); + return r; } ib->gpu_addr = chunk_ib->va_start; ib->length_dw = chunk_ib->ib_bytes / 4; ib->flags = chunk_ib->flags; + j++; } @@ -975,7 +989,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) return -EINVAL; - return 0; + return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx); } static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, @@ -1176,6 +1190,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job->uf_sequence = seq; amdgpu_job_free_resources(job); + amdgpu_ring_priority_get(job->ring, + amd_sched_get_job_priority(&job->base)); trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(&job->base); @@ -1189,7 +1205,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_cs *cs = data; struct amdgpu_cs_parser parser = {}; bool reserved_buffers = false; @@ -1197,8 +1212,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (!adev->accel_working) return -EBUSY; - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; parser.adev = adev; parser.filp = filp; @@ -1209,6 +1222,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; } + r = amdgpu_cs_ib_fill(adev, &parser); + if (r) + goto out; + r = amdgpu_cs_parser_bos(&parser, data); if (r) { if (r == -ENOMEM) @@ -1219,9 +1236,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) } reserved_buffers = true; - r = amdgpu_cs_ib_fill(adev, &parser); - if (r) - goto out; r = amdgpu_cs_dependencies(adev, &parser); if (r) { @@ -1257,16 +1271,12 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, { union drm_amdgpu_wait_cs *wait = data; struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); struct amdgpu_ring *ring = NULL; struct amdgpu_ctx *ctx; struct dma_fence *fence; long r; - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; - ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); if (ctx == NULL) return -EINVAL; @@ -1284,6 +1294,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, r = PTR_ERR(fence); else if (fence) { r = dma_fence_wait_timeout(fence, true, timeout); + if (r > 0 && fence->error) + r = fence->error; dma_fence_put(fence); } else r = 1; @@ -1335,16 +1347,12 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_fence_to_handle *info = data; struct dma_fence *fence; struct drm_syncobj *syncobj; struct sync_file *sync_file; int fd, r; - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; - fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence); if (IS_ERR(fence)) return PTR_ERR(fence); @@ -1425,6 +1433,9 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev, if (r == 0) break; + + if (fence->error) + return fence->error; } memset(wait, 0, sizeof(*wait)); @@ -1485,7 +1496,7 @@ out: wait->out.status = (r > 0); wait->out.first_signaled = first; /* set return value 0 to indicate success */ - r = 0; + r = array[first]->error; err_free_fence_array: for (i = 0; i < fence_count; i++) @@ -1506,15 +1517,12 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_wait_fences *wait = data; uint32_t fence_count = wait->in.fence_count; struct drm_amdgpu_fence *fences_user; struct drm_amdgpu_fence *fences; int r; - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; /* Get the fences from userspace */ fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), GFP_KERNEL); @@ -1572,14 +1580,14 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket) return -EINVAL; - r = amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem); - if (unlikely(r)) - return r; - - if ((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) - return 0; + if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { + (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains); + r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false, + false); + if (r) + return r; + } - (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains); - return ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false, false); + return amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 75c933b1a432..c184468e2b2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -23,13 +23,41 @@ */ #include <drm/drmP.h> +#include <drm/drm_auth.h> #include "amdgpu.h" +#include "amdgpu_sched.h" -static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) +static int amdgpu_ctx_priority_permit(struct drm_file *filp, + enum amd_sched_priority priority) +{ + /* NORMAL and below are accessible by everyone */ + if (priority <= AMD_SCHED_PRIORITY_NORMAL) + return 0; + + if (capable(CAP_SYS_NICE)) + return 0; + + if (drm_is_current_master(filp)) + return 0; + + return -EACCES; +} + +static int amdgpu_ctx_init(struct amdgpu_device *adev, + enum amd_sched_priority priority, + struct drm_file *filp, + struct amdgpu_ctx *ctx) { unsigned i, j; int r; + if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX) + return -EINVAL; + + r = amdgpu_ctx_priority_permit(filp, priority); + if (r) + return r; + memset(ctx, 0, sizeof(*ctx)); ctx->adev = adev; kref_init(&ctx->refcount); @@ -39,19 +67,24 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) if (!ctx->fences) return -ENOMEM; + mutex_init(&ctx->lock); + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { ctx->rings[i].sequence = 1; ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i]; } ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); + ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); + ctx->init_priority = priority; + ctx->override_priority = AMD_SCHED_PRIORITY_UNSET; /* create context entity for each ring */ for (i = 0; i < adev->num_rings; i++) { struct amdgpu_ring *ring = adev->rings[i]; struct amd_sched_rq *rq; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + rq = &ring->sched.sched_rq[priority]; if (ring == &adev->gfx.kiq.ring) continue; @@ -96,10 +129,14 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) &ctx->rings[i].entity); amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); + + mutex_destroy(&ctx->lock); } static int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, + struct drm_file *filp, + enum amd_sched_priority priority, uint32_t *id) { struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; @@ -117,8 +154,9 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, kfree(ctx); return r; } + *id = (uint32_t)r; - r = amdgpu_ctx_init(adev, ctx); + r = amdgpu_ctx_init(adev, priority, filp, ctx); if (r) { idr_remove(&mgr->ctx_handles, *id); *id = 0; @@ -193,6 +231,7 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, { int r; uint32_t id; + enum amd_sched_priority priority; union drm_amdgpu_ctx *args = data; struct amdgpu_device *adev = dev->dev_private; @@ -200,10 +239,16 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, r = 0; id = args->in.ctx_id; + priority = amdgpu_to_sched_priority(args->in.priority); + + /* For backwards compatibility reasons, we need to accept + * ioctls with garbage in the priority field */ + if (priority == AMD_SCHED_PRIORITY_INVALID) + priority = AMD_SCHED_PRIORITY_NORMAL; switch (args->in.op) { case AMDGPU_CTX_OP_ALLOC_CTX: - r = amdgpu_ctx_alloc(adev, fpriv, &id); + r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id); args->out.alloc.ctx_id = id; break; case AMDGPU_CTX_OP_FREE_CTX: @@ -256,12 +301,8 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, idx = seq & (amdgpu_sched_jobs - 1); other = cring->fences[idx]; - if (other) { - signed long r; - r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT); - if (r < 0) - return r; - } + if (other) + BUG_ON(!dma_fence_is_signaled(other)); dma_fence_get(fence); @@ -305,6 +346,51 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, return fence; } +void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, + enum amd_sched_priority priority) +{ + int i; + struct amdgpu_device *adev = ctx->adev; + struct amd_sched_rq *rq; + struct amd_sched_entity *entity; + struct amdgpu_ring *ring; + enum amd_sched_priority ctx_prio; + + ctx->override_priority = priority; + + ctx_prio = (ctx->override_priority == AMD_SCHED_PRIORITY_UNSET) ? + ctx->init_priority : ctx->override_priority; + + for (i = 0; i < adev->num_rings; i++) { + ring = adev->rings[i]; + entity = &ctx->rings[i].entity; + rq = &ring->sched.sched_rq[ctx_prio]; + + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + continue; + + amd_sched_entity_set_rq(entity, rq); + } +} + +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id) +{ + struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id]; + unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1); + struct dma_fence *other = cring->fences[idx]; + + if (other) { + signed long r; + r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); + if (r < 0) { + DRM_ERROR("Error (%ld) waiting for fence!\n", r); + return r; + } + } + + return 0; +} + void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) { mutex_init(&mgr->lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f5543758667f..2d792cdc094c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -110,10 +110,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, { uint32_t ret; - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) { - BUG_ON(in_interrupt()); + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) return amdgpu_virt_kiq_rreg(adev, reg); - } if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); @@ -138,10 +136,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, adev->last_mm_index = v; } - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) { - BUG_ON(in_interrupt()); + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) return amdgpu_virt_kiq_wreg(adev, reg, v); - } if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); @@ -551,7 +547,7 @@ int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb) if (offset < adev->wb.num_wb) { __set_bit(offset, adev->wb.used); - *wb = offset * 8; /* convert to dw offset */ + *wb = offset << 3; /* convert to dw offset */ return 0; } else { return -EINVAL; @@ -569,7 +565,7 @@ int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb) void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb) { if (wb < adev->wb.num_wb) - __clear_bit(wb, adev->wb.used); + __clear_bit(wb >> 3, adev->wb.used); } /** @@ -659,42 +655,96 @@ void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc) } /* - * GPU helpers function. + * Firmware Reservation functions */ /** - * amdgpu_need_post - check if the hw need post or not + * amdgpu_fw_reserve_vram_fini - free fw reserved vram * * @adev: amdgpu_device pointer * - * Check if the asic has been initialized (all asics) at driver startup - * or post is needed if hw reset is performed. - * Returns true if need or false if not. + * free fw reserved vram if it has been reserved. */ -bool amdgpu_need_post(struct amdgpu_device *adev) +void amdgpu_fw_reserve_vram_fini(struct amdgpu_device *adev) { - uint32_t reg; + amdgpu_bo_free_kernel(&adev->fw_vram_usage.reserved_bo, + NULL, &adev->fw_vram_usage.va); +} - if (adev->has_hw_reset) { - adev->has_hw_reset = false; - return true; - } +/** + * amdgpu_fw_reserve_vram_init - create bo vram reservation from fw + * + * @adev: amdgpu_device pointer + * + * create bo vram reservation from fw. + */ +int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev) +{ + int r = 0; + u64 gpu_addr; + u64 vram_size = adev->mc.visible_vram_size; - /* bios scratch used on CIK+ */ - if (adev->asic_type >= CHIP_BONAIRE) - return amdgpu_atombios_scratch_need_asic_init(adev); + adev->fw_vram_usage.va = NULL; + adev->fw_vram_usage.reserved_bo = NULL; - /* check MEM_SIZE for older asics */ - reg = amdgpu_asic_get_config_memsize(adev); + if (adev->fw_vram_usage.size > 0 && + adev->fw_vram_usage.size <= vram_size) { - if ((reg != 0) && (reg != 0xffffffff)) - return false; + r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, + PAGE_SIZE, true, 0, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0, + &adev->fw_vram_usage.reserved_bo); + if (r) + goto error_create; - return true; + r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false); + if (r) + goto error_reserve; + r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo, + AMDGPU_GEM_DOMAIN_VRAM, + adev->fw_vram_usage.start_offset, + (adev->fw_vram_usage.start_offset + + adev->fw_vram_usage.size), &gpu_addr); + if (r) + goto error_pin; + r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo, + &adev->fw_vram_usage.va); + if (r) + goto error_kmap; + amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); + } + return r; + +error_kmap: + amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo); +error_pin: + amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); +error_reserve: + amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo); +error_create: + adev->fw_vram_usage.va = NULL; + adev->fw_vram_usage.reserved_bo = NULL; + return r; } -static bool amdgpu_vpost_needed(struct amdgpu_device *adev) + +/* + * GPU helpers function. + */ +/** + * amdgpu_need_post - check if the hw need post or not + * + * @adev: amdgpu_device pointer + * + * Check if the asic has been initialized (all asics) at driver startup + * or post is needed if hw reset is performed. + * Returns true if need or false if not. + */ +bool amdgpu_need_post(struct amdgpu_device *adev) { + uint32_t reg; + if (amdgpu_sriov_vf(adev)) return false; @@ -717,7 +767,23 @@ static bool amdgpu_vpost_needed(struct amdgpu_device *adev) return true; } } - return amdgpu_need_post(adev); + + if (adev->has_hw_reset) { + adev->has_hw_reset = false; + return true; + } + + /* bios scratch used on CIK+ */ + if (adev->asic_type >= CHIP_BONAIRE) + return amdgpu_atombios_scratch_need_asic_init(adev); + + /* check MEM_SIZE for older asics */ + reg = amdgpu_asic_get_config_memsize(adev); + + if ((reg != 0) && (reg != 0xffffffff)) + return false; + + return true; } /** @@ -1605,7 +1671,6 @@ static int amdgpu_init(struct amdgpu_device *adev) return r; } adev->ip_blocks[i].status.sw = true; - /* need to do gmc hw init early so we can allocate gpu mem */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { r = amdgpu_vram_scratch_init(adev); @@ -1636,11 +1701,6 @@ static int amdgpu_init(struct amdgpu_device *adev) } } - mutex_lock(&adev->firmware.mutex); - if (amdgpu_ucode_init_bo(adev)) - adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT; - mutex_unlock(&adev->firmware.mutex); - for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.sw) continue; @@ -1776,8 +1836,6 @@ static int amdgpu_fini(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } - if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) - amdgpu_ucode_fini_bo(adev); for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.sw) @@ -1889,6 +1947,7 @@ static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev) static enum amd_ip_block_type ip_order[] = { AMD_IP_BLOCK_TYPE_SMC, + AMD_IP_BLOCK_TYPE_PSP, AMD_IP_BLOCK_TYPE_DCE, AMD_IP_BLOCK_TYPE_GFX, AMD_IP_BLOCK_TYPE_SDMA, @@ -1974,12 +2033,17 @@ static int amdgpu_resume(struct amdgpu_device *adev) static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) { - if (adev->is_atom_fw) { - if (amdgpu_atomfirmware_gpu_supports_virtualization(adev)) - adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; - } else { - if (amdgpu_atombios_has_gpu_virtualization_table(adev)) - adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; + if (amdgpu_sriov_vf(adev)) { + if (adev->is_atom_fw) { + if (amdgpu_atomfirmware_gpu_supports_virtualization(adev)) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; + } else { + if (amdgpu_atombios_has_gpu_virtualization_table(adev)) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; + } + + if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)) + amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); } } @@ -2066,6 +2130,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->vm_manager.vm_pte_num_rings = 0; adev->gart.gart_funcs = NULL; adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); + bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); adev->smc_rreg = &amdgpu_invalid_rreg; adev->smc_wreg = &amdgpu_invalid_wreg; @@ -2093,6 +2158,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->pm.mutex); mutex_init(&adev->gfx.gpu_clock_mutex); mutex_init(&adev->srbm_mutex); + mutex_init(&adev->gfx.pipe_reserve_mutex); mutex_init(&adev->grbm_idx_mutex); mutex_init(&adev->mn_lock); mutex_init(&adev->virt.vf_errors.lock); @@ -2189,10 +2255,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_device_detect_sriov_bios(adev); /* Post card if necessary */ - if (amdgpu_vpost_needed(adev)) { + if (amdgpu_need_post(adev)) { if (!adev->bios) { dev_err(adev->dev, "no vBIOS found\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); r = -EINVAL; goto failed; } @@ -2200,7 +2265,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_atom_asic_init(adev->mode_info.atom_context); if (r) { dev_err(adev->dev, "gpu post error!\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_POST_ERROR, 0, 0); goto failed; } } else { @@ -2270,6 +2334,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) DRM_ERROR("ib ring test failed (%d).\n", r); + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_init_data_exchange(adev); + amdgpu_fbdev_init(adev); r = amdgpu_pm_sysfs_init(adev); @@ -2347,6 +2414,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev) /* evict vram memory */ amdgpu_bo_evict_vram(adev); amdgpu_ib_pool_fini(adev); + amdgpu_fw_reserve_vram_fini(adev); amdgpu_fence_driver_fini(adev); amdgpu_fbdev_fini(adev); r = amdgpu_fini(adev); @@ -2617,6 +2685,9 @@ static bool amdgpu_check_soft_reset(struct amdgpu_device *adev) int i; bool asic_hang = false; + if (amdgpu_sriov_vf(adev)) + return true; + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; @@ -3020,7 +3091,6 @@ out: } } else { dev_err(adev->dev, "asic resume failed (%d).\n", r); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, 0, r); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { if (adev->rings[i] && adev->rings[i]->sched.thread) { kthread_unpark(adev->rings[i]->sched.thread); @@ -3038,7 +3108,6 @@ out: if (r) { /* bad news, how to tell it to userspace ? */ dev_info(adev->dev, "GPU reset failed\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); } else { dev_info(adev->dev, "GPU reset successed!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 715ede92ca8a..ec96bb1f9eaf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -71,9 +71,11 @@ * - 3.19.0 - Add support for UVD MJPEG decode * - 3.20.0 - Add support for local BOs * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl + * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl + * - 3.23.0 - Add query for VRAM lost counter */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 21 +#define KMS_DRIVER_MINOR 23 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 333bad749067..bd5b8065c32e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -169,6 +169,32 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) } /** + * amdgpu_fence_emit_polling - emit a fence on the requeste ring + * + * @ring: ring the fence is associated with + * @s: resulting sequence number + * + * Emits a fence command on the requested ring (all asics). + * Used For polling fence. + * Returns 0 on success, -ENOMEM on failure. + */ +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) +{ + uint32_t seq; + + if (!s) + return -EINVAL; + + seq = ++ring->fence_drv.sync_seq; + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, + seq, AMDGPU_FENCE_FLAG_INT); + + *s = seq; + + return 0; +} + +/** * amdgpu_fence_schedule_fallback - schedule fallback check * * @ring: pointer to struct amdgpu_ring @@ -260,7 +286,7 @@ static void amdgpu_fence_fallback(unsigned long arg) */ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) { - uint64_t seq = ACCESS_ONCE(ring->fence_drv.sync_seq); + uint64_t seq = READ_ONCE(ring->fence_drv.sync_seq); struct dma_fence *fence, **ptr; int r; @@ -282,6 +308,30 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) } /** + * amdgpu_fence_wait_polling - busy wait for givn sequence number + * + * @ring: ring index the fence is associated with + * @wait_seq: sequence number to wait + * @timeout: the timeout for waiting in usecs + * + * Wait for all fences on the requested ring to signal (all asics). + * Returns left time if no timeout, 0 or minus if timeout. + */ +signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, + uint32_t wait_seq, + signed long timeout) +{ + uint32_t seq; + + do { + seq = amdgpu_fence_read(ring); + udelay(5); + timeout -= 5; + } while ((int32_t)(wait_seq - seq) > 0 && timeout > 0); + + return timeout > 0 ? timeout : 0; +} +/** * amdgpu_fence_count_emitted - get the count of emitted fences * * @ring: ring the fence is associated with @@ -300,7 +350,7 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) amdgpu_fence_process(ring); emitted = 0x100000000ull; emitted -= atomic_read(&ring->fence_drv.last_seq); - emitted += ACCESS_ONCE(ring->fence_drv.sync_seq); + emitted += READ_ONCE(ring->fence_drv.sync_seq); return lower_32_bits(emitted); } @@ -641,6 +691,19 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) atomic_read(&ring->fence_drv.last_seq)); seq_printf(m, "Last emitted 0x%08x\n", ring->fence_drv.sync_seq); + + if (ring->funcs->type != AMDGPU_RING_TYPE_GFX) + continue; + + /* set in CP_VMID_PREEMPT and preemption occurred */ + seq_printf(m, "Last preempted 0x%08x\n", + le32_to_cpu(*(ring->fence_drv.cpu_addr + 2))); + /* set in CP_VMID_RESET and reset occurred */ + seq_printf(m, "Last reset 0x%08x\n", + le32_to_cpu(*(ring->fence_drv.cpu_addr + 4))); + /* Both preemption and reset occurred */ + seq_printf(m, "Last both 0x%08x\n", + le32_to_cpu(*(ring->fence_drv.cpu_addr + 6))); } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index f4370081f6e6..fe818501c520 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -332,12 +332,13 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, adev->gart.pages[p] = pagelist[i]; #endif - if (adev->gart.ptr) { - r = amdgpu_gart_map(adev, offset, pages, dma_addr, flags, - adev->gart.ptr); - if (r) - return r; - } + if (!adev->gart.ptr) + return 0; + + r = amdgpu_gart_map(adev, offset, pages, dma_addr, flags, + adev->gart.ptr); + if (r) + return r; mb(); amdgpu_gart_flush_gpu_tlb(adev, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b0d45c8e6bb3..a418df1b9422 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -212,7 +212,9 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_VRAM_CLEARED | - AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)) + AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | + AMDGPU_GEM_CREATE_EXPLICIT_SYNC)) + return -EINVAL; /* reject invalid gem domains */ @@ -344,7 +346,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, return 0; free_pages: - release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages, false); + release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); unlock_mmap_sem: up_read(¤t->mm->mmap_sem); @@ -577,11 +579,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->operation); return -EINVAL; } - if ((args->operation == AMDGPU_VA_OP_MAP) || - (args->operation == AMDGPU_VA_OP_REPLACE)) { - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; - } INIT_LIST_HEAD(&list); INIT_LIST_HEAD(&duplicates); @@ -789,11 +786,11 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) seq_printf(m, "\t0x%08x: %12ld byte %s", id, amdgpu_bo_size(bo), placement); - offset = ACCESS_ONCE(bo->tbo.mem.start); + offset = READ_ONCE(bo->tbo.mem.start); if (offset != AMDGPU_BO_INVALID_OFFSET) seq_printf(m, " @ 0x%010Lx", offset); - pin_count = ACCESS_ONCE(bo->pin_count); + pin_count = READ_ONCE(bo->pin_count); if (pin_count) seq_printf(m, " pin count %d", pin_count); seq_printf(m, "\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 83435ccbad44..ef043361009f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -201,7 +201,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, struct amdgpu_kiq *kiq = &adev->gfx.kiq; int r = 0; - mutex_init(&kiq->ring_mutex); + spin_lock_init(&kiq->ring_lock); r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 0d15eb7d31d7..33535d347734 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -169,7 +169,8 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man, int r; spin_lock(&mgr->lock); - if (atomic64_read(&mgr->available) < mem->num_pages) { + if ((&tbo->mem == mem || tbo->mem.mem_type != TTM_PL_TT) && + atomic64_read(&mgr->available) < mem->num_pages) { spin_unlock(&mgr->lock); return 0; } @@ -244,8 +245,9 @@ static void amdgpu_gtt_mgr_del(struct ttm_mem_type_manager *man, uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man) { struct amdgpu_gtt_mgr *mgr = man->priv; + s64 result = man->size - atomic64_read(&mgr->available); - return (u64)(man->size - atomic64_read(&mgr->available)) * PAGE_SIZE; + return (result > 0 ? result : 0) * PAGE_SIZE; } /** @@ -265,7 +267,7 @@ static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man, drm_mm_print(&mgr->mm, printer); spin_unlock(&mgr->lock); - drm_printf(printer, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n", + drm_printf(printer, "man size:%llu pages, gtt available:%lld pages, usage:%lluMB\n", man->size, (u64)atomic64_read(&mgr->available), amdgpu_gtt_mgr_usage(man) >> 20); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4510627ae83e..0cfc68db575b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -65,6 +65,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, amdgpu_sync_create(&(*job)->sync); amdgpu_sync_create(&(*job)->dep_sync); amdgpu_sync_create(&(*job)->sched_sync); + (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter); return 0; } @@ -103,6 +104,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job) { struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); + amdgpu_ring_priority_put(job->ring, amd_sched_get_job_priority(s_job)); dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->dep_sync); @@ -139,6 +141,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, job->fence_ctx = entity->fence_context; *f = dma_fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); + amdgpu_ring_priority_get(job->ring, + amd_sched_get_job_priority(&job->base)); amd_sched_entity_push_job(&job->base); return 0; @@ -177,8 +181,8 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) { struct dma_fence *fence = NULL; + struct amdgpu_device *adev; struct amdgpu_job *job; - struct amdgpu_fpriv *fpriv = NULL; int r; if (!sched_job) { @@ -186,23 +190,25 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) return NULL; } job = to_amdgpu_job(sched_job); + adev = job->adev; BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); trace_amdgpu_sched_run_job(job); - if (job->vm) - fpriv = container_of(job->vm, struct amdgpu_fpriv, vm); /* skip ib schedule when vram is lost */ - if (fpriv && amdgpu_kms_vram_lost(job->adev, fpriv)) + if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) { + dma_fence_set_error(&job->base.s_fence->finished, -ECANCELED); DRM_ERROR("Skip scheduling IBs!\n"); - else { - r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence); + } else { + r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, + &fence); if (r) DRM_ERROR("Error scheduling IBs (%d)\n", r); } /* if gpu reset, hw fence will be replaced here */ dma_fence_put(job->fence); job->fence = dma_fence_get(fence); + amdgpu_job_free_resources(job); return fence; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index b83ef4f204b3..720139e182a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -28,6 +28,7 @@ #include <drm/drmP.h> #include "amdgpu.h" #include <drm/amdgpu_drm.h> +#include "amdgpu_sched.h" #include "amdgpu_uvd.h" #include "amdgpu_vce.h" @@ -269,7 +270,6 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; struct drm_amdgpu_info *info = data; struct amdgpu_mode_info *minfo = &adev->mode_info; void __user *out = (void __user *)(uintptr_t)info->return_pointer; @@ -282,8 +282,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (!info->return_size || !info->return_pointer) return -EINVAL; - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; switch (info->query) { case AMDGPU_INFO_ACCEL_WORKING: @@ -765,6 +763,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file } return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0; } + case AMDGPU_INFO_VRAM_LOST_COUNTER: + ui32 = atomic_read(&adev->vram_lost_counter); + return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0; default: DRM_DEBUG_KMS("Invalid request %d\n", info->query); return -EINVAL; @@ -791,12 +792,6 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev) vga_switcheroo_process_delayed_switch(); } -bool amdgpu_kms_vram_lost(struct amdgpu_device *adev, - struct amdgpu_fpriv *fpriv) -{ - return fpriv->vram_lost_counter != atomic_read(&adev->vram_lost_counter); -} - /** * amdgpu_driver_open_kms - drm callback for open * @@ -853,7 +848,6 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); - fpriv->vram_lost_counter = atomic_read(&adev->vram_lost_counter); file_priv->driver_priv = fpriv; out_suspend: @@ -1023,6 +1017,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_SCHED, amdgpu_sched_ioctl, DRM_MASTER), DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_FENCE_TO_HANDLE, amdgpu_cs_fence_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), /* KMS */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 6982baeccd14..ea25164e7f4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -40,9 +40,7 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) { struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); - struct amdgpu_bo *bo; - - bo = container_of(tbo, struct amdgpu_bo, tbo); + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); amdgpu_bo_kunmap(bo); @@ -371,6 +369,9 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, !kernel, NULL, acc_size, sg, resv, &amdgpu_ttm_bo_destroy); + if (unlikely(r != 0)) + return r; + bytes_moved = atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved; if (adev->mc.visible_vram_size < adev->mc.real_vram_size && @@ -380,9 +381,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, else amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0); - if (unlikely(r != 0)) - return r; - if (kernel) bo->tbo.priority = 1; @@ -884,7 +882,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) return; - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); amdgpu_vm_bo_invalidate(adev, abo, evict); amdgpu_bo_kunmap(abo); @@ -911,7 +909,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) return 0; - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); /* Remember that this BO was accessed by the CPU */ abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 39b6bf6fb051..428aae048f4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -94,6 +94,11 @@ struct amdgpu_bo { }; }; +static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo) +{ + return container_of(tbo, struct amdgpu_bo, tbo); +} + /** * amdgpu_mem_type_to_domain - return domain corresponding to mem_type * @mem_type: ttm memory type @@ -188,6 +193,14 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) } } +/** + * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced + */ +static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) +{ + return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; +} + int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 3b42f407971d..5f5aa5fddc16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -145,6 +145,8 @@ static int amdgpu_pp_hw_init(void *handle) int ret = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) + amdgpu_ucode_init_bo(adev); if (adev->powerplay.ip_funcs->hw_init) ret = adev->powerplay.ip_funcs->hw_init( @@ -162,6 +164,9 @@ static int amdgpu_pp_hw_fini(void *handle) ret = adev->powerplay.ip_funcs->hw_fini( adev->powerplay.pp_handle); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) + amdgpu_ucode_fini_bo(adev); + return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index f1035a689d35..447d446b5015 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -411,6 +411,13 @@ static int psp_hw_init(void *handle) return 0; mutex_lock(&adev->firmware.mutex); + /* + * This sequence is just used on hw_init only once, no need on + * resume. + */ + ret = amdgpu_ucode_init_bo(adev); + if (ret) + goto failed; ret = psp_load_fw(adev); if (ret) { @@ -435,6 +442,8 @@ static int psp_hw_fini(void *handle) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) return 0; + amdgpu_ucode_fini_bo(adev); + psp_ring_destroy(psp, PSP_RING_TYPE__KM); amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 019932a7ea3a..a98fbbb4739f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -136,7 +136,8 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) if (ring->funcs->end_use) ring->funcs->end_use(ring); - amdgpu_ring_lru_touch(ring->adev, ring); + if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) + amdgpu_ring_lru_touch(ring->adev, ring); } /** @@ -155,6 +156,75 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) } /** + * amdgpu_ring_priority_put - restore a ring's priority + * + * @ring: amdgpu_ring structure holding the information + * @priority: target priority + * + * Release a request for executing at @priority + */ +void amdgpu_ring_priority_put(struct amdgpu_ring *ring, + enum amd_sched_priority priority) +{ + int i; + + if (!ring->funcs->set_priority) + return; + + if (atomic_dec_return(&ring->num_jobs[priority]) > 0) + return; + + /* no need to restore if the job is already at the lowest priority */ + if (priority == AMD_SCHED_PRIORITY_NORMAL) + return; + + mutex_lock(&ring->priority_mutex); + /* something higher prio is executing, no need to decay */ + if (ring->priority > priority) + goto out_unlock; + + /* decay priority to the next level with a job available */ + for (i = priority; i >= AMD_SCHED_PRIORITY_MIN; i--) { + if (i == AMD_SCHED_PRIORITY_NORMAL + || atomic_read(&ring->num_jobs[i])) { + ring->priority = i; + ring->funcs->set_priority(ring, i); + break; + } + } + +out_unlock: + mutex_unlock(&ring->priority_mutex); +} + +/** + * amdgpu_ring_priority_get - change the ring's priority + * + * @ring: amdgpu_ring structure holding the information + * @priority: target priority + * + * Request a ring's priority to be raised to @priority (refcounted). + */ +void amdgpu_ring_priority_get(struct amdgpu_ring *ring, + enum amd_sched_priority priority) +{ + if (!ring->funcs->set_priority) + return; + + atomic_inc(&ring->num_jobs[priority]); + + mutex_lock(&ring->priority_mutex); + if (priority <= ring->priority) + goto out_unlock; + + ring->priority = priority; + ring->funcs->set_priority(ring, priority); + +out_unlock: + mutex_unlock(&ring->priority_mutex); +} + +/** * amdgpu_ring_init - init driver ring struct. * * @adev: amdgpu_device pointer @@ -169,7 +239,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned max_dw, struct amdgpu_irq_src *irq_src, unsigned irq_type) { - int r; + int r, i; int sched_hw_submission = amdgpu_sched_hw_submission; /* Set the hw submission limit higher for KIQ because @@ -247,9 +317,14 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, } ring->max_dw = max_dw; + ring->priority = AMD_SCHED_PRIORITY_NORMAL; + mutex_init(&ring->priority_mutex); INIT_LIST_HEAD(&ring->lru_list); amdgpu_ring_lru_touch(adev, ring); + for (i = 0; i < AMD_SCHED_PRIORITY_MAX; ++i) + atomic_set(&ring->num_jobs[i], 0); + if (amdgpu_debugfs_ring_init(adev, ring)) { DRM_ERROR("Failed to register debugfs file for rings !\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 491bd5512dcc..b18c2b96691f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -24,6 +24,7 @@ #ifndef __AMDGPU_RING_H__ #define __AMDGPU_RING_H__ +#include <drm/amdgpu_drm.h> #include "gpu_scheduler.h" /* max number of rings */ @@ -56,6 +57,7 @@ struct amdgpu_device; struct amdgpu_ring; struct amdgpu_ib; struct amdgpu_cs_parser; +struct amdgpu_job; /* * Fences. @@ -88,8 +90,12 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); void amdgpu_fence_driver_resume(struct amdgpu_device *adev); int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence); +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); void amdgpu_fence_process(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); +signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, + uint32_t wait_seq, + signed long timeout); unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); /* @@ -147,6 +153,9 @@ struct amdgpu_ring_funcs { void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void (*emit_tmz)(struct amdgpu_ring *ring, bool start); + /* priority functions */ + void (*set_priority) (struct amdgpu_ring *ring, + enum amd_sched_priority priority); }; struct amdgpu_ring { @@ -187,6 +196,12 @@ struct amdgpu_ring { volatile u32 *cond_exe_cpu_addr; unsigned vm_inv_eng; bool has_compute_vm_bug; + + atomic_t num_jobs[AMD_SCHED_PRIORITY_MAX]; + struct mutex priority_mutex; + /* protected by priority_mutex */ + int priority; + #if defined(CONFIG_DEBUG_FS) struct dentry *ent; #endif @@ -197,6 +212,10 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_ring_commit(struct amdgpu_ring *ring); void amdgpu_ring_undo(struct amdgpu_ring *ring); +void amdgpu_ring_priority_get(struct amdgpu_ring *ring, + enum amd_sched_priority priority); +void amdgpu_ring_priority_put(struct amdgpu_ring *ring, + enum amd_sched_priority priority); int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned ring_size, struct amdgpu_irq_src *irq_src, unsigned irq_type); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c new file mode 100644 index 000000000000..290cc3f9c433 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -0,0 +1,109 @@ +/* + * Copyright 2017 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Andres Rodriguez <andresx7@gmail.com> + */ + +#include <linux/fdtable.h> +#include <linux/pid.h> +#include <drm/amdgpu_drm.h> +#include "amdgpu.h" + +#include "amdgpu_vm.h" + +enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) +{ + switch (amdgpu_priority) { + case AMDGPU_CTX_PRIORITY_VERY_HIGH: + return AMD_SCHED_PRIORITY_HIGH_HW; + case AMDGPU_CTX_PRIORITY_HIGH: + return AMD_SCHED_PRIORITY_HIGH_SW; + case AMDGPU_CTX_PRIORITY_NORMAL: + return AMD_SCHED_PRIORITY_NORMAL; + case AMDGPU_CTX_PRIORITY_LOW: + case AMDGPU_CTX_PRIORITY_VERY_LOW: + return AMD_SCHED_PRIORITY_LOW; + case AMDGPU_CTX_PRIORITY_UNSET: + return AMD_SCHED_PRIORITY_UNSET; + default: + WARN(1, "Invalid context priority %d\n", amdgpu_priority); + return AMD_SCHED_PRIORITY_INVALID; + } +} + +static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev, + int fd, + enum amd_sched_priority priority) +{ + struct file *filp = fcheck(fd); + struct drm_file *file; + struct pid *pid; + struct amdgpu_fpriv *fpriv; + struct amdgpu_ctx *ctx; + uint32_t id; + + if (!filp) + return -EINVAL; + + pid = get_pid(((struct drm_file *)filp->private_data)->pid); + + mutex_lock(&adev->ddev->filelist_mutex); + list_for_each_entry(file, &adev->ddev->filelist, lhead) { + if (file->pid != pid) + continue; + + fpriv = file->driver_priv; + idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id) + amdgpu_ctx_priority_override(ctx, priority); + } + mutex_unlock(&adev->ddev->filelist_mutex); + + put_pid(pid); + + return 0; +} + +int amdgpu_sched_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + union drm_amdgpu_sched *args = data; + struct amdgpu_device *adev = dev->dev_private; + enum amd_sched_priority priority; + int r; + + priority = amdgpu_to_sched_priority(args->in.priority); + if (args->in.flags || priority == AMD_SCHED_PRIORITY_INVALID) + return -EINVAL; + + switch (args->in.op) { + case AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE: + r = amdgpu_sched_process_priority_override(adev, + args->in.fd, + priority); + break; + default: + DRM_ERROR("Invalid sched op specified: %d\n", args->in.op); + r = -EINVAL; + break; + } + + return r; +} diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h index 13c8dbbccaf2..b28c067d3822 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h @@ -1,5 +1,5 @@ /* - * Copyright 2015 Advanced Micro Devices, Inc. + * Copyright 2017 Valve Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -19,22 +19,16 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * + * Authors: Andres Rodriguez <andresx7@gmail.com> */ -#ifndef _ICELAND_SMC_H -#define _ICELAND_SMC_H -#include "smumgr.h" +#ifndef __AMDGPU_SCHED_H__ +#define __AMDGPU_SCHED_H__ +#include <drm/drmP.h> -int iceland_populate_all_graphic_levels(struct pp_hwmgr *hwmgr); -int iceland_populate_all_memory_levels(struct pp_hwmgr *hwmgr); -int iceland_init_smc_table(struct pp_hwmgr *hwmgr); -int iceland_thermal_setup_fan_table(struct pp_hwmgr *hwmgr); -int iceland_update_sclk_threshold(struct pp_hwmgr *hwmgr); -uint32_t iceland_get_offsetof(uint32_t type, uint32_t member); -uint32_t iceland_get_mac_definition(uint32_t value); -int iceland_process_firmware_header(struct pp_hwmgr *hwmgr); -int iceland_initialize_mc_reg_table(struct pp_hwmgr *hwmgr); -bool iceland_is_dpm_running(struct pp_hwmgr *hwmgr); -#endif +enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority); +int amdgpu_sched_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); +#endif // __AMDGPU_SCHED_H__ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index c586f44312f9..a4bf21f8f1c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -169,14 +169,14 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, * * @sync: sync object to add fences from reservation object to * @resv: reservation object with embedded fence - * @shared: true if we should only sync to the exclusive fence + * @explicit_sync: true if we should only sync to the exclusive fence * * Sync to the fence */ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct reservation_object *resv, - void *owner) + void *owner, bool explicit_sync) { struct reservation_object_list *flist; struct dma_fence *f; @@ -191,6 +191,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, f = reservation_object_get_excl(resv); r = amdgpu_sync_fence(adev, sync, f); + if (explicit_sync) + return r; + flist = reservation_object_get_list(resv); if (!flist || r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index dc7687993317..70d7e3a279a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -45,7 +45,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct reservation_object *resv, - void *owner); + void *owner, + bool explicit_sync); struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 213988f336ed..f337c316ec2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #if !defined(_AMDGPU_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) #define _AMDGPU_TRACE_H_ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c index 9ec96b9e85d1..b160b958e5fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright Red Hat Inc 2010. * * Permission is hereby granted, free of charge, to any person obtaining a diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 15a28578d458..ad5bf86ee8a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -44,6 +44,7 @@ #include <linux/debugfs.h> #include <linux/iommu.h> #include "amdgpu.h" +#include "amdgpu_object.h" #include "amdgpu_trace.h" #include "bif/bif_4_1_d.h" @@ -209,7 +210,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, placement->num_busy_placement = 1; return; } - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); switch (bo->mem.mem_type) { case TTM_PL_VRAM: if (adev->mman.buffer_funcs && @@ -257,7 +258,7 @@ gtt: static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) { - struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); if (amdgpu_ttm_tt_get_usermm(bo->ttm)) return -EPERM; @@ -289,97 +290,177 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, return addr; } -static int amdgpu_move_blit(struct ttm_buffer_object *bo, - bool evict, bool no_wait_gpu, - struct ttm_mem_reg *new_mem, - struct ttm_mem_reg *old_mem) +/** + * amdgpu_find_mm_node - Helper function finds the drm_mm_node + * corresponding to @offset. It also modifies the offset to be + * within the drm_mm_node returned + */ +static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, + unsigned long *offset) { - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + struct drm_mm_node *mm_node = mem->mm_node; - struct drm_mm_node *old_mm, *new_mm; - uint64_t old_start, old_size, new_start, new_size; - unsigned long num_pages; - struct dma_fence *fence = NULL; - int r; + while (*offset >= (mm_node->size << PAGE_SHIFT)) { + *offset -= (mm_node->size << PAGE_SHIFT); + ++mm_node; + } + return mm_node; +} - BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0); +/** + * amdgpu_copy_ttm_mem_to_mem - Helper function for copy + * + * The function copies @size bytes from {src->mem + src->offset} to + * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a + * move and different for a BO to BO copy. + * + * @f: Returns the last fence if multiple jobs are submitted. + */ +int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, + struct amdgpu_copy_mem *src, + struct amdgpu_copy_mem *dst, + uint64_t size, + struct reservation_object *resv, + struct dma_fence **f) +{ + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + struct drm_mm_node *src_mm, *dst_mm; + uint64_t src_node_start, dst_node_start, src_node_size, + dst_node_size, src_page_offset, dst_page_offset; + struct dma_fence *fence = NULL; + int r = 0; + const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * + AMDGPU_GPU_PAGE_SIZE); if (!ring->ready) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; } - old_mm = old_mem->mm_node; - old_size = old_mm->size; - old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem); + src_mm = amdgpu_find_mm_node(src->mem, &src->offset); + src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) + + src->offset; + src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset; + src_page_offset = src_node_start & (PAGE_SIZE - 1); - new_mm = new_mem->mm_node; - new_size = new_mm->size; - new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem); + dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset); + dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) + + dst->offset; + dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset; + dst_page_offset = dst_node_start & (PAGE_SIZE - 1); - num_pages = new_mem->num_pages; mutex_lock(&adev->mman.gtt_window_lock); - while (num_pages) { - unsigned long cur_pages = min(min(old_size, new_size), - (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE); - uint64_t from = old_start, to = new_start; + + while (size) { + unsigned long cur_size; + uint64_t from = src_node_start, to = dst_node_start; struct dma_fence *next; - if (old_mem->mem_type == TTM_PL_TT && - !amdgpu_gtt_mgr_is_allocated(old_mem)) { - r = amdgpu_map_buffer(bo, old_mem, cur_pages, - old_start, 0, ring, &from); + /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst + * begins at an offset, then adjust the size accordingly + */ + cur_size = min3(min(src_node_size, dst_node_size), size, + GTT_MAX_BYTES); + if (cur_size + src_page_offset > GTT_MAX_BYTES || + cur_size + dst_page_offset > GTT_MAX_BYTES) + cur_size -= max(src_page_offset, dst_page_offset); + + /* Map only what needs to be accessed. Map src to window 0 and + * dst to window 1 + */ + if (src->mem->mem_type == TTM_PL_TT && + !amdgpu_gtt_mgr_is_allocated(src->mem)) { + r = amdgpu_map_buffer(src->bo, src->mem, + PFN_UP(cur_size + src_page_offset), + src_node_start, 0, ring, + &from); if (r) goto error; + /* Adjust the offset because amdgpu_map_buffer returns + * start of mapped page + */ + from += src_page_offset; } - if (new_mem->mem_type == TTM_PL_TT && - !amdgpu_gtt_mgr_is_allocated(new_mem)) { - r = amdgpu_map_buffer(bo, new_mem, cur_pages, - new_start, 1, ring, &to); + if (dst->mem->mem_type == TTM_PL_TT && + !amdgpu_gtt_mgr_is_allocated(dst->mem)) { + r = amdgpu_map_buffer(dst->bo, dst->mem, + PFN_UP(cur_size + dst_page_offset), + dst_node_start, 1, ring, + &to); if (r) goto error; + to += dst_page_offset; } - r = amdgpu_copy_buffer(ring, from, to, - cur_pages * PAGE_SIZE, - bo->resv, &next, false, true); + r = amdgpu_copy_buffer(ring, from, to, cur_size, + resv, &next, false, true); if (r) goto error; dma_fence_put(fence); fence = next; - num_pages -= cur_pages; - if (!num_pages) + size -= cur_size; + if (!size) break; - old_size -= cur_pages; - if (!old_size) { - old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem); - old_size = old_mm->size; + src_node_size -= cur_size; + if (!src_node_size) { + src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm, + src->mem); + src_node_size = (src_mm->size << PAGE_SHIFT); } else { - old_start += cur_pages * PAGE_SIZE; + src_node_start += cur_size; + src_page_offset = src_node_start & (PAGE_SIZE - 1); } - - new_size -= cur_pages; - if (!new_size) { - new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem); - new_size = new_mm->size; + dst_node_size -= cur_size; + if (!dst_node_size) { + dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm, + dst->mem); + dst_node_size = (dst_mm->size << PAGE_SHIFT); } else { - new_start += cur_pages * PAGE_SIZE; + dst_node_start += cur_size; + dst_page_offset = dst_node_start & (PAGE_SIZE - 1); } } +error: mutex_unlock(&adev->mman.gtt_window_lock); + if (f) + *f = dma_fence_get(fence); + dma_fence_put(fence); + return r; +} + + +static int amdgpu_move_blit(struct ttm_buffer_object *bo, + bool evict, bool no_wait_gpu, + struct ttm_mem_reg *new_mem, + struct ttm_mem_reg *old_mem) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + struct amdgpu_copy_mem src, dst; + struct dma_fence *fence = NULL; + int r; + + src.bo = bo; + dst.bo = bo; + src.mem = old_mem; + dst.mem = new_mem; + src.offset = 0; + dst.offset = 0; + + r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, + new_mem->num_pages << PAGE_SHIFT, + bo->resv, &fence); + if (r) + goto error; r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); dma_fence_put(fence); return r; error: - mutex_unlock(&adev->mman.gtt_window_lock); - if (fence) dma_fence_wait(fence, false); dma_fence_put(fence); @@ -484,7 +565,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, int r; /* Can't move a pinned BO */ - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); if (WARN_ON_ONCE(abo->pin_count > 0)) return -EINVAL; @@ -582,13 +663,12 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, unsigned long page_offset) { - struct drm_mm_node *mm = bo->mem.mm_node; - uint64_t size = mm->size; - uint64_t offset = page_offset; + struct drm_mm_node *mm; + unsigned long offset = (page_offset << PAGE_SHIFT); - page_offset = do_div(offset, size); - mm += offset; - return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + page_offset; + mm = amdgpu_find_mm_node(&bo->mem, &offset); + return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + + (offset >> PAGE_SHIFT); } /* @@ -666,7 +746,7 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) return 0; release_pages: - release_pages(pages, pinned, 0); + release_pages(pages, pinned); up_read(¤t->mm->mmap_sem); return r; } @@ -829,7 +909,8 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) placement.busy_placement = &placements; placements.fpfn = 0; placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT; - placements.flags = bo->mem.placement | TTM_PL_FLAG_TT; + placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) | + TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp, true, false); if (unlikely(r)) @@ -1112,9 +1193,6 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, unsigned long num_pages = bo->mem.num_pages; struct drm_mm_node *node = bo->mem.mm_node; - if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET) - return ttm_bo_eviction_valuable(bo, place); - switch (bo->mem.mem_type) { case TTM_PL_TT: return true; @@ -1129,7 +1207,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, num_pages -= node->size; ++node; } - break; + return false; default: break; @@ -1142,9 +1220,9 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, unsigned long offset, void *buf, int len, int write) { - struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); - struct drm_mm_node *nodes = abo->tbo.mem.mm_node; + struct drm_mm_node *nodes; uint32_t value = 0; int ret = 0; uint64_t pos; @@ -1153,10 +1231,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, if (bo->mem.mem_type != TTM_PL_VRAM) return -EIO; - while (offset >= (nodes->size << PAGE_SHIFT)) { - offset -= nodes->size << PAGE_SHIFT; - ++nodes; - } + nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset); pos = (nodes->start << PAGE_SHIFT) + offset; while (len && pos < adev->mc.mc_vram_size) { @@ -1255,6 +1330,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Change the size here instead of the init above so only lpfn is affected */ amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); + /* + *The reserved vram for firmware must be pinned to the specified + *place on the VRAM, so reserve it early. + */ + r = amdgpu_fw_reserve_vram_init(adev); + if (r) { + return r; + } + r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->stolen_vga_memory, @@ -1479,7 +1563,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, job->vm_needs_flush = vm_needs_flush; if (resv) { r = amdgpu_sync_resv(adev, &job->sync, resv, - AMDGPU_FENCE_OWNER_UNDEFINED); + AMDGPU_FENCE_OWNER_UNDEFINED, + false); if (r) { DRM_ERROR("sync failed (%d).\n", r); goto error_free; @@ -1571,7 +1656,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, if (resv) { r = amdgpu_sync_resv(adev, &job->sync, resv, - AMDGPU_FENCE_OWNER_UNDEFINED); + AMDGPU_FENCE_OWNER_UNDEFINED, false); if (r) { DRM_ERROR("sync failed (%d).\n", r); goto error_free; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 7abae6867339..abd4084982a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -58,6 +58,12 @@ struct amdgpu_mman { struct amd_sched_entity entity; }; +struct amdgpu_copy_mem { + struct ttm_buffer_object *bo; + struct ttm_mem_reg *mem; + unsigned long offset; +}; + extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func; extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func; @@ -72,6 +78,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, struct reservation_object *resv, struct dma_fence **fence, bool direct_submit, bool vm_needs_flush); +int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, + struct amdgpu_copy_mem *src, + struct amdgpu_copy_mem *dst, + uint64_t size, + struct reservation_object *resv, + struct dma_fence **f); int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint64_t src_data, struct reservation_object *resv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index b46280c1279f..2918de2f39ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -648,7 +648,7 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) uint32_t allocated = 0; uint32_t tmp, handle = 0; uint32_t *size = &tmp; - int i, r, idx = 0; + int i, r = 0, idx = 0; p->job->vm = NULL; ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c index 746b81339835..7f7097931c6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c @@ -31,7 +31,12 @@ void amdgpu_vf_error_put(struct amdgpu_device *adev, uint64_t error_data) { int index; - uint16_t error_code = AMDGIM_ERROR_CODE(AMDGIM_ERROR_CATEGORY_VF, sub_error_code); + uint16_t error_code; + + if (!amdgpu_sriov_vf(adev)) + return; + + error_code = AMDGIM_ERROR_CODE(AMDGIM_ERROR_CATEGORY_VF, sub_error_code); mutex_lock(&adev->virt.vf_errors.lock); index = adev->virt.vf_errors.write_count % AMDGPU_VF_ERROR_ENTRY_SIZE; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index ab05121b9272..6738df836a70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -22,7 +22,7 @@ */ #include "amdgpu.h" -#define MAX_KIQ_REG_WAIT 100000 +#define MAX_KIQ_REG_WAIT 100000000 /* in usecs */ int amdgpu_allocate_static_csa(struct amdgpu_device *adev) { @@ -114,27 +114,25 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r; - uint32_t val; - struct dma_fence *f; + unsigned long flags; + uint32_t val, seq; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_rreg); - mutex_lock(&kiq->ring_mutex); + spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_rreg(ring, reg); - amdgpu_fence_emit(ring, &f); + amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); - mutex_unlock(&kiq->ring_mutex); + spin_unlock_irqrestore(&kiq->ring_lock, flags); - r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); - dma_fence_put(f); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); if (r < 1) { - DRM_ERROR("wait for kiq fence error: %ld.\n", r); + DRM_ERROR("wait for kiq fence error: %ld\n", r); return ~0; } - val = adev->wb.wb[adev->virt.reg_val_offs]; return val; @@ -143,23 +141,23 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) { signed long r; - struct dma_fence *f; + unsigned long flags; + uint32_t seq; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_wreg); - mutex_lock(&kiq->ring_mutex); + spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); - amdgpu_fence_emit(ring, &f); + amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); - mutex_unlock(&kiq->ring_mutex); + spin_unlock_irqrestore(&kiq->ring_lock, flags); - r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); if (r < 1) - DRM_ERROR("wait for kiq fence error: %ld.\n", r); - dma_fence_put(f); + DRM_ERROR("wait for kiq fence error: %ld\n", r); } /** @@ -274,3 +272,80 @@ void amdgpu_virt_free_mm_table(struct amdgpu_device *adev) (void *)&adev->virt.mm_table.cpu_addr); adev->virt.mm_table.gpu_addr = 0; } + + +int amdgpu_virt_fw_reserve_get_checksum(void *obj, + unsigned long obj_size, + unsigned int key, + unsigned int chksum) +{ + unsigned int ret = key; + unsigned long i = 0; + unsigned char *pos; + + pos = (char *)obj; + /* calculate checksum */ + for (i = 0; i < obj_size; ++i) + ret += *(pos + i); + /* minus the chksum itself */ + pos = (char *)&chksum; + for (i = 0; i < sizeof(chksum); ++i) + ret -= *(pos + i); + return ret; +} + +void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) +{ + uint32_t pf2vf_ver = 0; + uint32_t pf2vf_size = 0; + uint32_t checksum = 0; + uint32_t checkval; + char *str; + + adev->virt.fw_reserve.p_pf2vf = NULL; + adev->virt.fw_reserve.p_vf2pf = NULL; + + if (adev->fw_vram_usage.va != NULL) { + adev->virt.fw_reserve.p_pf2vf = + (struct amdgim_pf2vf_info_header *)( + adev->fw_vram_usage.va + AMDGIM_DATAEXCHANGE_OFFSET); + pf2vf_ver = adev->virt.fw_reserve.p_pf2vf->version; + AMDGPU_FW_VRAM_PF2VF_READ(adev, header.size, &pf2vf_size); + AMDGPU_FW_VRAM_PF2VF_READ(adev, checksum, &checksum); + + /* pf2vf message must be in 4K */ + if (pf2vf_size > 0 && pf2vf_size < 4096) { + checkval = amdgpu_virt_fw_reserve_get_checksum( + adev->virt.fw_reserve.p_pf2vf, pf2vf_size, + adev->virt.fw_reserve.checksum_key, checksum); + if (checkval == checksum) { + adev->virt.fw_reserve.p_vf2pf = + ((void *)adev->virt.fw_reserve.p_pf2vf + + pf2vf_size); + memset((void *)adev->virt.fw_reserve.p_vf2pf, 0, + sizeof(amdgim_vf2pf_info)); + AMDGPU_FW_VRAM_VF2PF_WRITE(adev, header.version, + AMDGPU_FW_VRAM_VF2PF_VER); + AMDGPU_FW_VRAM_VF2PF_WRITE(adev, header.size, + sizeof(amdgim_vf2pf_info)); + AMDGPU_FW_VRAM_VF2PF_READ(adev, driver_version, + &str); +#ifdef MODULE + if (THIS_MODULE->version != NULL) + strcpy(str, THIS_MODULE->version); + else +#endif + strcpy(str, "N/A"); + AMDGPU_FW_VRAM_VF2PF_WRITE(adev, driver_cert, + 0); + AMDGPU_FW_VRAM_VF2PF_WRITE(adev, checksum, + amdgpu_virt_fw_reserve_get_checksum( + adev->virt.fw_reserve.p_vf2pf, + pf2vf_size, + adev->virt.fw_reserve.checksum_key, 0)); + } + } + } +} + + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index e5fd0ff6b29d..b89d37fc406f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -58,6 +58,179 @@ struct amdgpu_virt_ops { void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); }; +/* + * Firmware Reserve Frame buffer + */ +struct amdgpu_virt_fw_reserve { + struct amdgim_pf2vf_info_header *p_pf2vf; + struct amdgim_vf2pf_info_header *p_vf2pf; + unsigned int checksum_key; +}; +/* + * Defination between PF and VF + * Structures forcibly aligned to 4 to keep the same style as PF. + */ +#define AMDGIM_DATAEXCHANGE_OFFSET (64 * 1024) + +#define AMDGIM_GET_STRUCTURE_RESERVED_SIZE(total, u8, u16, u32, u64) \ + (total - (((u8)+3) / 4 + ((u16)+1) / 2 + (u32) + (u64)*2)) + +enum AMDGIM_FEATURE_FLAG { + /* GIM supports feature of Error log collecting */ + AMDGIM_FEATURE_ERROR_LOG_COLLECT = 0x1, + /* GIM supports feature of loading uCodes */ + AMDGIM_FEATURE_GIM_LOAD_UCODES = 0x2, +}; + +struct amdgim_pf2vf_info_header { + /* the total structure size in byte. */ + uint32_t size; + /* version of this structure, written by the GIM */ + uint32_t version; +} __aligned(4); +struct amdgim_pf2vf_info_v1 { + /* header contains size and version */ + struct amdgim_pf2vf_info_header header; + /* max_width * max_height */ + unsigned int uvd_enc_max_pixels_count; + /* 16x16 pixels/sec, codec independent */ + unsigned int uvd_enc_max_bandwidth; + /* max_width * max_height */ + unsigned int vce_enc_max_pixels_count; + /* 16x16 pixels/sec, codec independent */ + unsigned int vce_enc_max_bandwidth; + /* MEC FW position in kb from the start of visible frame buffer */ + unsigned int mecfw_kboffset; + /* The features flags of the GIM driver supports. */ + unsigned int feature_flags; + /* use private key from mailbox 2 to create chueksum */ + unsigned int checksum; +} __aligned(4); + +struct amdgim_pf2vf_info_v2 { + /* header contains size and version */ + struct amdgim_pf2vf_info_header header; + /* use private key from mailbox 2 to create chueksum */ + uint32_t checksum; + /* The features flags of the GIM driver supports. */ + uint32_t feature_flags; + /* max_width * max_height */ + uint32_t uvd_enc_max_pixels_count; + /* 16x16 pixels/sec, codec independent */ + uint32_t uvd_enc_max_bandwidth; + /* max_width * max_height */ + uint32_t vce_enc_max_pixels_count; + /* 16x16 pixels/sec, codec independent */ + uint32_t vce_enc_max_bandwidth; + /* MEC FW position in kb from the start of VF visible frame buffer */ + uint64_t mecfw_kboffset; + /* MEC FW size in KB */ + uint32_t mecfw_ksize; + /* UVD FW position in kb from the start of VF visible frame buffer */ + uint64_t uvdfw_kboffset; + /* UVD FW size in KB */ + uint32_t uvdfw_ksize; + /* VCE FW position in kb from the start of VF visible frame buffer */ + uint64_t vcefw_kboffset; + /* VCE FW size in KB */ + uint32_t vcefw_ksize; + uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (9 + sizeof(struct amdgim_pf2vf_info_header)/sizeof(uint32_t)), 3)]; +} __aligned(4); + + +struct amdgim_vf2pf_info_header { + /* the total structure size in byte. */ + uint32_t size; + /*version of this structure, written by the guest */ + uint32_t version; +} __aligned(4); + +struct amdgim_vf2pf_info_v1 { + /* header contains size and version */ + struct amdgim_vf2pf_info_header header; + /* driver version */ + char driver_version[64]; + /* driver certification, 1=WHQL, 0=None */ + unsigned int driver_cert; + /* guest OS type and version: need a define */ + unsigned int os_info; + /* in the unit of 1M */ + unsigned int fb_usage; + /* guest gfx engine usage percentage */ + unsigned int gfx_usage; + /* guest gfx engine health percentage */ + unsigned int gfx_health; + /* guest compute engine usage percentage */ + unsigned int compute_usage; + /* guest compute engine health percentage */ + unsigned int compute_health; + /* guest vce engine usage percentage. 0xffff means N/A. */ + unsigned int vce_enc_usage; + /* guest vce engine health percentage. 0xffff means N/A. */ + unsigned int vce_enc_health; + /* guest uvd engine usage percentage. 0xffff means N/A. */ + unsigned int uvd_enc_usage; + /* guest uvd engine usage percentage. 0xffff means N/A. */ + unsigned int uvd_enc_health; + unsigned int checksum; +} __aligned(4); + +struct amdgim_vf2pf_info_v2 { + /* header contains size and version */ + struct amdgim_vf2pf_info_header header; + uint32_t checksum; + /* driver version */ + uint8_t driver_version[64]; + /* driver certification, 1=WHQL, 0=None */ + uint32_t driver_cert; + /* guest OS type and version: need a define */ + uint32_t os_info; + /* in the unit of 1M */ + uint32_t fb_usage; + /* guest gfx engine usage percentage */ + uint32_t gfx_usage; + /* guest gfx engine health percentage */ + uint32_t gfx_health; + /* guest compute engine usage percentage */ + uint32_t compute_usage; + /* guest compute engine health percentage */ + uint32_t compute_health; + /* guest vce engine usage percentage. 0xffff means N/A. */ + uint32_t vce_enc_usage; + /* guest vce engine health percentage. 0xffff means N/A. */ + uint32_t vce_enc_health; + /* guest uvd engine usage percentage. 0xffff means N/A. */ + uint32_t uvd_enc_usage; + /* guest uvd engine usage percentage. 0xffff means N/A. */ + uint32_t uvd_enc_health; + uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amdgim_vf2pf_info_header)/sizeof(uint32_t)), 0)]; +} __aligned(4); + +#define AMDGPU_FW_VRAM_VF2PF_VER 2 +typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ; + +#define AMDGPU_FW_VRAM_VF2PF_WRITE(adev, field, val) \ + do { \ + ((amdgim_vf2pf_info *)adev->virt.fw_reserve.p_vf2pf)->field = (val); \ + } while (0) + +#define AMDGPU_FW_VRAM_VF2PF_READ(adev, field, val) \ + do { \ + (*val) = ((amdgim_vf2pf_info *)adev->virt.fw_reserve.p_vf2pf)->field; \ + } while (0) + +#define AMDGPU_FW_VRAM_PF2VF_READ(adev, field, val) \ + do { \ + if (!adev->virt.fw_reserve.p_pf2vf) \ + *(val) = 0; \ + else { \ + if (adev->virt.fw_reserve.p_pf2vf->version == 1) \ + *(val) = ((struct amdgim_pf2vf_info_v1 *)adev->virt.fw_reserve.p_pf2vf)->field; \ + if (adev->virt.fw_reserve.p_pf2vf->version == 2) \ + *(val) = ((struct amdgim_pf2vf_info_v2 *)adev->virt.fw_reserve.p_pf2vf)->field; \ + } \ + } while (0) + /* GPU virtualization */ struct amdgpu_virt { uint32_t caps; @@ -72,6 +245,7 @@ struct amdgpu_virt { struct amdgpu_mm_table mm_table; const struct amdgpu_virt_ops *ops; struct amdgpu_vf_error_buffer vf_errors; + struct amdgpu_virt_fw_reserve fw_reserve; }; #define AMDGPU_CSA_SIZE (8 * 1024) @@ -114,5 +288,9 @@ int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job); int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); +int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size, + unsigned int key, + unsigned int chksum); +void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index fee0a32ac56f..c8c26f21993c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -328,9 +328,10 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, AMDGPU_GEM_CREATE_SHADOW); if (vm->pte_support_ats) { - init_value = AMDGPU_PTE_SYSTEM; + init_value = AMDGPU_PTE_DEFAULT_ATC; if (level != adev->vm_manager.num_level - 1) init_value |= AMDGPU_PDE_PTE; + } /* walk over the address space and allocate the page tables */ @@ -1034,7 +1035,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, int r; amdgpu_sync_create(&sync); - amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner); + amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false); r = amdgpu_sync_wait(&sync, true); amdgpu_sync_free(&sync); @@ -1175,11 +1176,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, amdgpu_ring_pad_ib(ring, params.ib); amdgpu_sync_resv(adev, &job->sync, parent->base.bo->tbo.resv, - AMDGPU_FENCE_OWNER_VM); + AMDGPU_FENCE_OWNER_VM, false); if (shadow) amdgpu_sync_resv(adev, &job->sync, shadow->tbo.resv, - AMDGPU_FENCE_OWNER_VM); + AMDGPU_FENCE_OWNER_VM, false); WARN_ON(params.ib->length_dw > ndw); r = amdgpu_job_submit(job, ring, &vm->entity, @@ -1243,7 +1244,7 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm, int amdgpu_vm_update_directories(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - int r; + int r = 0; spin_lock(&vm->status_lock); while (!list_empty(&vm->relocated)) { @@ -1643,7 +1644,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, goto error_free; r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, - owner); + owner, false); if (r) goto error_free; @@ -1698,6 +1699,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, struct drm_mm_node *nodes, struct dma_fence **fence) { + unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size; uint64_t pfn, start = mapping->start; int r; @@ -1732,6 +1734,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, } do { + dma_addr_t *dma_addr = NULL; uint64_t max_entries; uint64_t addr, last; @@ -1745,15 +1748,32 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, } if (pages_addr) { + uint64_t count; + max_entries = min(max_entries, 16ull * 1024ull); - addr = 0; + for (count = 1; count < max_entries; ++count) { + uint64_t idx = pfn + count; + + if (pages_addr[idx] != + (pages_addr[idx - 1] + PAGE_SIZE)) + break; + } + + if (count < min_linear_pages) { + addr = pfn << PAGE_SHIFT; + dma_addr = pages_addr; + } else { + addr = pages_addr[pfn]; + max_entries = count; + } + } else if (flags & AMDGPU_PTE_VALID) { addr += adev->vm_manager.vram_base_offset; + addr += pfn << PAGE_SHIFT; } - addr += pfn << PAGE_SHIFT; last = min((uint64_t)mapping->last, start + max_entries - 1); - r = amdgpu_vm_bo_update_mapping(adev, exclusive, pages_addr, vm, + r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm, start, last, flags, addr, fence); if (r) @@ -2017,7 +2037,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, list_del(&mapping->list); if (vm->pte_support_ats) - init_pte_value = AMDGPU_PTE_SYSTEM; + init_pte_value = AMDGPU_PTE_DEFAULT_ATC; r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, mapping->start, mapping->last, @@ -2629,7 +2649,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (adev->asic_type == CHIP_RAVEN) { vm->pte_support_ats = true; - init_pde_value = AMDGPU_PTE_SYSTEM | AMDGPU_PDE_PTE; + init_pde_value = AMDGPU_PTE_DEFAULT_ATC + | AMDGPU_PDE_PTE; + } } else vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & @@ -2737,8 +2759,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdgpu_bo_va_mapping *mapping, *tmp; bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; + struct amdgpu_bo *root; u64 fault; - int i; + int i, r; /* Clear pending page faults from IH when the VM is destroyed */ while (kfifo_get(&vm->faults, &fault)) @@ -2773,7 +2796,15 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_free_mapping(adev, vm, mapping, NULL); } - amdgpu_vm_free_levels(&vm->root); + root = amdgpu_bo_ref(vm->root.base.bo); + r = amdgpu_bo_reserve(root, true); + if (r) { + dev_err(adev->dev, "Leaking page tables because BO reservation failed\n"); + } else { + amdgpu_vm_free_levels(&vm->root); + amdgpu_bo_unreserve(root); + } + amdgpu_bo_unref(&root); dma_fence_put(vm->last_update); for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) amdgpu_vm_free_reserved_vmid(adev, vm, i); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index d68f39b4e5e7..aa914256b4bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -73,6 +73,16 @@ struct amdgpu_bo_list_entry; #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) +/* For Raven */ +#define AMDGPU_MTYPE_CC 2 + +#define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM \ + | AMDGPU_PTE_SNOOPED \ + | AMDGPU_PTE_EXECUTABLE \ + | AMDGPU_PTE_READABLE \ + | AMDGPU_PTE_WRITEABLE \ + | AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_CC)) + /* How to programm VM fault handling */ #define AMDGPU_VM_FAULT_STOP_NEVER 0 #define AMDGPU_VM_FAULT_STOP_FIRST 1 diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 147e92b3a959..b8002ac3e536 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -20,6 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ +#include <linux/kernel.h> #include <linux/firmware.h> #include <drm/drmP.h> #include "amdgpu.h" @@ -3952,10 +3953,10 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) adev->gfx.rlc.reg_list_format_size_bytes >> 2, unique_indices, &indices_count, - sizeof(unique_indices) / sizeof(int), + ARRAY_SIZE(unique_indices), indirect_start_offsets, &offset_count, - sizeof(indirect_start_offsets)/sizeof(int)); + ARRAY_SIZE(indirect_start_offsets)); /* save and restore list */ WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); @@ -3977,14 +3978,14 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) /* starting offsets starts */ WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.starting_offsets_start); - for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) + for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) WREG32(mmRLC_GPM_SCRATCH_DATA, indirect_start_offsets[i]); /* unique indices */ temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; data = mmRLC_SRM_INDEX_CNTL_DATA_0; - for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { + for (i = 0; i < ARRAY_SIZE(unique_indices); i++) { if (unique_indices[i] != 0) { WREG32(temp + i, unique_indices[i] & 0x3FFFF); WREG32(data + i, unique_indices[i] >> 20); @@ -6394,6 +6395,104 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } +static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, + bool acquire) +{ + struct amdgpu_device *adev = ring->adev; + int pipe_num, tmp, reg; + int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; + + pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; + + /* first me only has 2 entries, GFX and HP3D */ + if (ring->me > 0) + pipe_num -= 2; + + reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; + tmp = RREG32(reg); + tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); + WREG32(reg, tmp); +} + +static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + bool acquire) +{ + int i, pipe; + bool reserve; + struct amdgpu_ring *iring; + + mutex_lock(&adev->gfx.pipe_reserve_mutex); + pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); + if (acquire) + set_bit(pipe, adev->gfx.pipe_reserve_bitmap); + else + clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); + + if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { + /* Clear all reservations - everyone reacquires all resources */ + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) + gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], + true); + + for (i = 0; i < adev->gfx.num_compute_rings; ++i) + gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], + true); + } else { + /* Lower all pipes without a current reservation */ + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { + iring = &adev->gfx.gfx_ring[i]; + pipe = amdgpu_gfx_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); + reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); + gfx_v8_0_ring_set_pipe_percent(iring, reserve); + } + + for (i = 0; i < adev->gfx.num_compute_rings; ++i) { + iring = &adev->gfx.compute_ring[i]; + pipe = amdgpu_gfx_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); + reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); + gfx_v8_0_ring_set_pipe_percent(iring, reserve); + } + } + + mutex_unlock(&adev->gfx.pipe_reserve_mutex); +} + +static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + bool acquire) +{ + uint32_t pipe_priority = acquire ? 0x2 : 0x0; + uint32_t queue_priority = acquire ? 0xf : 0x0; + + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority); + WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority); + + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} +static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, + enum amd_sched_priority priority) +{ + struct amdgpu_device *adev = ring->adev; + bool acquire = priority == AMD_SCHED_PRIORITY_HIGH_HW; + + if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) + return; + + gfx_v8_0_hqd_set_priority(adev, ring, acquire); + gfx_v8_0_pipe_reserve_resources(adev, ring, acquire); +} + static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) @@ -6839,6 +6938,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .test_ib = gfx_v8_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, + .set_priority = gfx_v8_0_ring_set_priority_compute, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 99a5b3b92e8e..7f15bb2c5233 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -20,6 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ +#include <linux/kernel.h> #include <linux/firmware.h> #include <drm/drmP.h> #include "amdgpu.h" @@ -1730,10 +1731,10 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) adev->gfx.rlc.reg_list_format_size_bytes >> 2, unique_indirect_regs, &unique_indirect_reg_count, - sizeof(unique_indirect_regs)/sizeof(int), + ARRAY_SIZE(unique_indirect_regs), indirect_start_offsets, &indirect_start_offsets_count, - sizeof(indirect_start_offsets)/sizeof(int)); + ARRAY_SIZE(indirect_start_offsets)); /* enable auto inc in case it is disabled */ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); @@ -1770,12 +1771,12 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) /* write the starting offsets to RLC scratch ram */ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), adev->gfx.rlc.starting_offsets_start); - for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) + for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), indirect_start_offsets[i]); /* load unique indirect regs*/ - for (i = 0; i < sizeof(unique_indirect_regs)/sizeof(int); i++) { + for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i, unique_indirect_regs[i] & 0x3FFFF); WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 2812d88a8bdd..b4906d2f30d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -183,6 +183,12 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n"); return r; } + /* Retrieve checksum from mailbox2 */ + if (req == IDH_REQ_GPU_INIT_ACCESS) { + adev->virt.fw_reserve.checksum_key = + RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW2)); + } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index dea7c909ca5f..4e20d91d5d50 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -257,6 +257,9 @@ int psp_v10_0_cmd_submit(struct psp_context *psp, unsigned int psp_write_ptr_reg = 0; struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem; struct psp_ring *ring = &psp->km_ring; + struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem; + struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start + + ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1; struct amdgpu_device *adev = psp->adev; uint32_t ring_size_dw = ring->ring_size / 4; uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; @@ -266,9 +269,16 @@ int psp_v10_0_cmd_submit(struct psp_context *psp, /* Update KM RB frame pointer to new frame */ if ((psp_write_ptr_reg % ring_size_dw) == 0) - write_frame = ring->ring_mem; + write_frame = ring_buffer_start; else - write_frame = ring->ring_mem + (psp_write_ptr_reg / rb_frame_size_dw); + write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); + /* Check invalid write_frame ptr address */ + if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { + DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", + ring_buffer_start, ring_buffer_end, write_frame); + DRM_ERROR("write_frame is pointing to address out of bounds\n"); + return -EINVAL; + } /* Initialize KM RB frame */ memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index cee5c396b277..c7bcfe8e286c 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -367,6 +367,9 @@ int psp_v3_1_cmd_submit(struct psp_context *psp, unsigned int psp_write_ptr_reg = 0; struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem; struct psp_ring *ring = &psp->km_ring; + struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem; + struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start + + ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1; struct amdgpu_device *adev = psp->adev; uint32_t ring_size_dw = ring->ring_size / 4; uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; @@ -378,9 +381,16 @@ int psp_v3_1_cmd_submit(struct psp_context *psp, /* write_frame ptr increments by size of rb_frame in bytes */ /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */ if ((psp_write_ptr_reg % ring_size_dw) == 0) - write_frame = ring->ring_mem; + write_frame = ring_buffer_start; else - write_frame = ring->ring_mem + (psp_write_ptr_reg / rb_frame_size_dw); + write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); + /* Check invalid write_frame ptr address */ + if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { + DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", + ring_buffer_start, ring_buffer_end, write_frame); + DRM_ERROR("write_frame is pointing to address out of bounds\n"); + return -EINVAL; + } /* Initialize KM RB frame */ memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 6705954a985d..4e67fe1e7955 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -279,10 +279,7 @@ static void soc15_init_golden_registers(struct amdgpu_device *adev) } static u32 soc15_get_xclk(struct amdgpu_device *adev) { - if (adev->asic_type == CHIP_VEGA10) - return adev->clock.spll.reference_freq/4; - else - return adev->clock.spll.reference_freq; + return adev->clock.spll.reference_freq; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 60af7310a234..920910ac8663 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -268,8 +268,9 @@ err: * * Close up a stream for HW test or if userspace failed to do so */ -int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - bool direct, struct dma_fence **fence) +static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, + uint32_t handle, + bool direct, struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; @@ -370,6 +371,10 @@ static int uvd_v6_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (!(adev->flags & AMD_IS_APU) && + (RREG32_SMC(ixCC_HARVEST_FUSES) & CC_HARVEST_FUSES__UVD_DISABLE_MASK)) + return -ENOENT; + uvd_v6_0_set_ring_funcs(adev); if (uvd_v6_0_enc_support(adev)) { @@ -564,11 +569,7 @@ static int uvd_v6_0_suspend(void *handle) if (r) return r; - /* Skip this for APU for now */ - if (!(adev->flags & AMD_IS_APU)) - r = amdgpu_uvd_suspend(adev); - - return r; + return amdgpu_uvd_suspend(adev); } static int uvd_v6_0_resume(void *handle) @@ -576,12 +577,10 @@ static int uvd_v6_0_resume(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* Skip this for APU for now */ - if (!(adev->flags & AMD_IS_APU)) { - r = amdgpu_uvd_resume(adev); - if (r) - return r; - } + r = amdgpu_uvd_resume(adev); + if (r) + return r; + return uvd_v6_0_hw_init(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index b8ed8faf2003..6634545060fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -592,11 +592,7 @@ static int uvd_v7_0_suspend(void *handle) if (r) return r; - /* Skip this for APU for now */ - if (!(adev->flags & AMD_IS_APU)) - r = amdgpu_uvd_suspend(adev); - - return r; + return amdgpu_uvd_suspend(adev); } static int uvd_v7_0_resume(void *handle) @@ -604,12 +600,10 @@ static int uvd_v7_0_resume(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* Skip this for APU for now */ - if (!(adev->flags & AMD_IS_APU)) { - r = amdgpu_uvd_resume(adev); - if (r) - return r; - } + r = amdgpu_uvd_resume(adev); + if (r) + return r; + return uvd_v7_0_hw_init(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 90332f55cfba..cf81065e3c5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -365,15 +365,10 @@ static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev) { u32 tmp; - /* Fiji, Stoney, Polaris10, Polaris11, Polaris12 are single pipe */ if ((adev->asic_type == CHIP_FIJI) || - (adev->asic_type == CHIP_STONEY) || - (adev->asic_type == CHIP_POLARIS10) || - (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12)) + (adev->asic_type == CHIP_STONEY)) return AMDGPU_VCE_HARVEST_VCE1; - /* Tonga and CZ are dual or single pipe */ if (adev->flags & AMD_IS_APU) tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) & VCE_HARVEST_FUSE_MACRO__MASK) >> @@ -391,6 +386,11 @@ static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev) case 3: return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1; default: + if ((adev->asic_type == CHIP_POLARIS10) || + (adev->asic_type == CHIP_POLARIS11) || + (adev->asic_type == CHIP_POLARIS12)) + return AMDGPU_VCE_HARVEST_VCE1; + return 0; } } diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index e13c67c8d2c0..bc5a2945bd2b 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -4,6 +4,6 @@ config HSA_AMD tristate "HSA kernel driver for AMD GPU devices" - depends on (DRM_RADEON || DRM_AMDGPU) && AMD_IOMMU_V2 && X86_64 + depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64 help Enable this if you want to use HSA features on AMD GPU devices. diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index b400d5664252..7bb0bc0ca3d6 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for Heterogenous System Architecture support for AMD GPU devices # diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 211fc48697fa..3d5ccb3755d4 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -36,6 +36,7 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, /* Do not process in ISR, just request it to be forwarded to WQ. */ return (pasid != 0) && (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || + ihre->source_id == CIK_INTSRC_SDMA_TRAP || ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG || ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE); } @@ -46,6 +47,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, unsigned int pasid; const struct cik_ih_ring_entry *ihre = (const struct cik_ih_ring_entry *)ih_ring_entry; + uint32_t context_id = ihre->data & 0xfffffff; pasid = (ihre->ring_id & 0xffff0000) >> 16; @@ -53,9 +55,11 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, return; if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE) - kfd_signal_event_interrupt(pasid, 0, 0); + kfd_signal_event_interrupt(pasid, context_id, 28); + else if (ihre->source_id == CIK_INTSRC_SDMA_TRAP) + kfd_signal_event_interrupt(pasid, context_id, 28); else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG) - kfd_signal_event_interrupt(pasid, ihre->data & 0xFF, 8); + kfd_signal_event_interrupt(pasid, context_id & 0xff, 8); else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE) kfd_signal_hw_exception_event(pasid); } diff --git a/drivers/gpu/drm/amd/amdkfd/cik_int.h b/drivers/gpu/drm/amd/amdkfd/cik_int.h index 79a16d24c1b8..109298b9d507 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_int.h +++ b/drivers/gpu/drm/amd/amdkfd/cik_int.h @@ -32,9 +32,10 @@ struct cik_ih_ring_entry { uint32_t reserved; }; -#define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6 #define CIK_INTSRC_CP_END_OF_PIPE 0xB5 #define CIK_INTSRC_CP_BAD_OPCODE 0xB7 +#define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6 +#define CIK_INTSRC_SDMA_TRAP 0xE0 #define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 660b3fbade41..505d39156acd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -282,8 +282,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, p->pasid, dev->id); - err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, - 0, q_properties.type, &queue_id); + err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id); if (err != 0) goto err_create_queue; @@ -451,8 +450,8 @@ static int kfd_ioctl_dbg_register(struct file *filep, return -EINVAL; } - mutex_lock(kfd_get_dbgmgr_mutex()); mutex_lock(&p->mutex); + mutex_lock(kfd_get_dbgmgr_mutex()); /* * make sure that we have pdd, if this the first queue created for @@ -480,8 +479,8 @@ static int kfd_ioctl_dbg_register(struct file *filep, } out: - mutex_unlock(&p->mutex); mutex_unlock(kfd_get_dbgmgr_mutex()); + mutex_unlock(&p->mutex); return status; } @@ -836,15 +835,12 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, void *data) { struct kfd_ioctl_wait_events_args *args = data; - enum kfd_event_wait_result wait_result; int err; err = kfd_wait_on_events(p, args->num_events, (void __user *)args->events_ptr, (args->wait_for_all != 0), - args->timeout, &wait_result); - - args->wait_result = wait_result; + args->timeout, &args->wait_result); return err; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index 0aa021aa0aa1..c407f6bd9956 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -184,9 +184,10 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) struct kernel_queue *kq = NULL; int status; + properties.type = KFD_QUEUE_TYPE_DIQ; + status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, - &properties, 0, KFD_QUEUE_TYPE_DIQ, - &qid); + &properties, &qid); if (status) { pr_err("Failed to create DIQ\n"); @@ -769,13 +770,8 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) union GRBM_GFX_INDEX_BITS reg_gfx_index; struct kfd_process_device *pdd; struct dbg_wave_control_info wac_info; - int temp; - int first_vmid_to_scan = 8; - int last_vmid_to_scan = 15; - - first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1; - temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan; - last_vmid_to_scan = first_vmid_to_scan + ffz(temp); + int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; + int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; reg_sq_cmd.u32All = 0; status = 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 5df12b287201..621a3b53a038 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -92,6 +92,8 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, unsigned int chunk_size); static void kfd_gtt_sa_fini(struct kfd_dev *kfd); +static int kfd_resume(struct kfd_dev *kfd); + static const struct kfd_device_info *lookup_device_info(unsigned short did) { size_t i; @@ -169,15 +171,8 @@ static bool device_iommu_pasid_init(struct kfd_dev *kfd) (unsigned int)(1 << kfd->device_info->max_pasid_bits), iommu_info.max_pasids); - err = amd_iommu_init_device(kfd->pdev, pasid_limit); - if (err < 0) { - dev_err(kfd_device, "error initializing iommu device\n"); - return false; - } - if (!kfd_set_pasid_limit(pasid_limit)) { dev_err(kfd_device, "error setting pasid limit\n"); - amd_iommu_free_device(kfd->pdev); return false; } @@ -189,7 +184,7 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) struct kfd_dev *dev = kfd_device_by_pci_dev(pdev); if (dev) - kfd_unbind_process_from_device(dev, pasid); + kfd_process_iommu_unbind_callback(dev, pasid); } /* @@ -224,6 +219,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->shared_resources = *gpu_resources; + kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1; + kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; + kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd + - kfd->vm_info.first_vmid_kfd + 1; + /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * kfd->device_info->mqd_size_aligned; @@ -273,29 +273,22 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto kfd_interrupt_error; } - if (!device_iommu_pasid_init(kfd)) { - dev_err(kfd_device, - "Error initializing iommuv2 for device %x:%x\n", - kfd->pdev->vendor, kfd->pdev->device); - goto device_iommu_pasid_error; - } - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, - iommu_pasid_shutdown_callback); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb); - kfd->dqm = device_queue_manager_init(kfd); if (!kfd->dqm) { dev_err(kfd_device, "Error initializing queue manager\n"); goto device_queue_manager_error; } - if (kfd->dqm->ops.start(kfd->dqm)) { + if (!device_iommu_pasid_init(kfd)) { dev_err(kfd_device, - "Error starting queue manager for device %x:%x\n", + "Error initializing iommuv2 for device %x:%x\n", kfd->pdev->vendor, kfd->pdev->device); - goto dqm_start_error; + goto device_iommu_pasid_error; } + if (kfd_resume(kfd)) + goto kfd_resume_error; + kfd->dbgmgr = NULL; kfd->init_complete = true; @@ -307,11 +300,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto out; -dqm_start_error: +kfd_resume_error: +device_iommu_pasid_error: device_queue_manager_uninit(kfd->dqm); device_queue_manager_error: - amd_iommu_free_device(kfd->pdev); -device_iommu_pasid_error: kfd_interrupt_exit(kfd); kfd_interrupt_error: kfd_topology_remove_device(kfd); @@ -331,8 +323,8 @@ out: void kgd2kfd_device_exit(struct kfd_dev *kfd) { if (kfd->init_complete) { + kgd2kfd_suspend(kfd); device_queue_manager_uninit(kfd->dqm); - amd_iommu_free_device(kfd->pdev); kfd_interrupt_exit(kfd); kfd_topology_remove_device(kfd); kfd_doorbell_fini(kfd); @@ -345,35 +337,59 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) void kgd2kfd_suspend(struct kfd_dev *kfd) { - if (kfd->init_complete) { - kfd->dqm->ops.stop(kfd->dqm); - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); - amd_iommu_free_device(kfd->pdev); - } + if (!kfd->init_complete) + return; + + kfd->dqm->ops.stop(kfd->dqm); + + kfd_unbind_processes_from_device(kfd); + + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); + amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); + amd_iommu_free_device(kfd->pdev); } int kgd2kfd_resume(struct kfd_dev *kfd) { - unsigned int pasid_limit; - int err; + if (!kfd->init_complete) + return 0; - pasid_limit = kfd_get_pasid_limit(); + return kfd_resume(kfd); - if (kfd->init_complete) { - err = amd_iommu_init_device(kfd->pdev, pasid_limit); - if (err < 0) { - dev_err(kfd_device, "failed to initialize iommu\n"); - return -ENXIO; - } +} + +static int kfd_resume(struct kfd_dev *kfd) +{ + int err = 0; + unsigned int pasid_limit = kfd_get_pasid_limit(); - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, - iommu_pasid_shutdown_callback); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb); - kfd->dqm->ops.start(kfd->dqm); + err = amd_iommu_init_device(kfd->pdev, pasid_limit); + if (err) + return -ENXIO; + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, + iommu_pasid_shutdown_callback); + amd_iommu_set_invalid_ppr_cb(kfd->pdev, + iommu_invalid_ppr_cb); + + err = kfd_bind_processes_to_device(kfd); + if (err) + goto processes_bind_error; + + err = kfd->dqm->ops.start(kfd->dqm); + if (err) { + dev_err(kfd_device, + "Error starting queue manager for device %x:%x\n", + kfd->pdev->vendor, kfd->pdev->device); + goto dqm_start_error; } - return 0; + return err; + +dqm_start_error: +processes_bind_error: + amd_iommu_free_device(kfd->pdev); + + return err; } /* This is called directly from KGD at ISR. */ @@ -387,7 +403,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) if (kfd->interrupts_active && interrupt_is_wanted(kfd, ih_ring_entry) && enqueue_ih_ring_entry(kfd, ih_ring_entry)) - schedule_work(&kfd->interrupt_work); + queue_work(kfd->ih_wq, &kfd->interrupt_work); spin_unlock(&kfd->interrupt_lock); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 53a66e821624..e202921c150e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -44,9 +44,14 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); -static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock); -static int destroy_queues_cpsch(struct device_queue_manager *dqm, - bool preempt_static_queues, bool lock); +static int execute_queues_cpsch(struct device_queue_manager *dqm, + enum kfd_unmap_queues_filter filter, + uint32_t filter_param); +static int unmap_queues_cpsch(struct device_queue_manager *dqm, + enum kfd_unmap_queues_filter filter, + uint32_t filter_param); + +static int map_queues_cpsch(struct device_queue_manager *dqm); static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, @@ -113,11 +118,11 @@ static int allocate_vmid(struct device_queue_manager *dqm, if (dqm->vmid_bitmap == 0) return -ENOMEM; - bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, CIK_VMID_NUM); + bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, + dqm->dev->vm_info.vmid_num_kfd); clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap); - /* Kaveri kfd vmid's starts from vmid 8 */ - allocated_vmid = bit + KFD_VMID_START_OFFSET; + allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd; pr_debug("vmid allocation %d\n", allocated_vmid); qpd->vmid = allocated_vmid; q->properties.vmid = allocated_vmid; @@ -132,7 +137,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) { - int bit = qpd->vmid - KFD_VMID_START_OFFSET; + int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; /* Release the vmid mapping */ set_pasid_vmid_mapping(dqm, 0, qpd->vmid); @@ -184,6 +189,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, } list_add(&q->list, &qpd->queues_list); + qpd->queue_count++; if (q->properties.is_active) dqm->queue_count++; @@ -273,6 +279,9 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, dqm->dev->kfd2kgd->set_scratch_backing_va( dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); + if (!q->properties.is_active) + return 0; + retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties, q->process->mm); if (retval) @@ -288,65 +297,74 @@ out_deallocate_hqd: return retval; } -static int destroy_queue_nocpsch(struct device_queue_manager *dqm, +/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked + * to avoid asynchronized access + */ +static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) { int retval; struct mqd_manager *mqd; - retval = 0; - - mutex_lock(&dqm->lock); + mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); + if (!mqd) + return -ENOMEM; if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { - mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); - if (mqd == NULL) { - retval = -ENOMEM; - goto out; - } deallocate_hqd(dqm, q); } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); - if (mqd == NULL) { - retval = -ENOMEM; - goto out; - } dqm->sdma_queue_count--; deallocate_sdma_queue(dqm, q->sdma_id); } else { pr_debug("q->properties.type %d is invalid\n", q->properties.type); - retval = -EINVAL; - goto out; + return -EINVAL; } + dqm->total_queue_count--; retval = mqd->destroy_mqd(mqd, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_RESET, - QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, + KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); - - if (retval) - goto out; + if (retval == -ETIME) + qpd->reset_wavefronts = true; mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); list_del(&q->list); - if (list_empty(&qpd->queues_list)) + if (list_empty(&qpd->queues_list)) { + if (qpd->reset_wavefronts) { + pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", + dqm->dev); + /* dbgdev_wave_reset_wavefronts has to be called before + * deallocate_vmid(), i.e. when vmid is still in use. + */ + dbgdev_wave_reset_wavefronts(dqm->dev, + qpd->pqm->process); + qpd->reset_wavefronts = false; + } + deallocate_vmid(dqm, qpd, q); + } + qpd->queue_count--; if (q->properties.is_active) dqm->queue_count--; - /* - * Unconditionally decrement this counter, regardless of the queue's - * type - */ - dqm->total_queue_count--; - pr_debug("Total of %d queues are accountable so far\n", - dqm->total_queue_count); + return retval; +} -out: +static int destroy_queue_nocpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + struct queue *q) +{ + int retval; + + mutex_lock(&dqm->lock); + retval = destroy_queue_nocpsch_locked(dqm, qpd, q); mutex_unlock(&dqm->lock); + return retval; } @@ -364,29 +382,56 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) goto out_unlock; } - if (q->properties.is_active) - prev_active = true; + /* Save previous activity state for counters */ + prev_active = q->properties.is_active; + + /* Make sure the queue is unmapped before updating the MQD */ + if (sched_policy != KFD_SCHED_POLICY_NO_HWS) { + retval = unmap_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + if (retval) { + pr_err("unmap queue failed\n"); + goto out_unlock; + } + } else if (prev_active && + (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || + q->properties.type == KFD_QUEUE_TYPE_SDMA)) { + retval = mqd->destroy_mqd(mqd, q->mqd, + KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, + KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); + if (retval) { + pr_err("destroy mqd failed\n"); + goto out_unlock; + } + } + + retval = mqd->update_mqd(mqd, q->mqd, &q->properties); /* - * - * check active state vs. the previous state - * and modify counter accordingly + * check active state vs. the previous state and modify + * counter accordingly. map_queues_cpsch uses the + * dqm->queue_count to determine whether a new runlist must be + * uploaded. */ - retval = mqd->update_mqd(mqd, q->mqd, &q->properties); - if ((q->properties.is_active) && (!prev_active)) + if (q->properties.is_active && !prev_active) dqm->queue_count++; else if (!q->properties.is_active && prev_active) dqm->queue_count--; if (sched_policy != KFD_SCHED_POLICY_NO_HWS) - retval = execute_queues_cpsch(dqm, false); + retval = map_queues_cpsch(dqm); + else if (q->properties.is_active && + (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || + q->properties.type == KFD_QUEUE_TYPE_SDMA)) + retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, + &q->properties, q->process->mm); out_unlock: mutex_unlock(&dqm->lock); return retval; } -static struct mqd_manager *get_mqd_manager_nocpsch( +static struct mqd_manager *get_mqd_manager( struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) { struct mqd_manager *mqd; @@ -407,7 +452,7 @@ static struct mqd_manager *get_mqd_manager_nocpsch( return mqd; } -static int register_process_nocpsch(struct device_queue_manager *dqm, +static int register_process(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { struct device_process_node *n; @@ -422,7 +467,7 @@ static int register_process_nocpsch(struct device_queue_manager *dqm, mutex_lock(&dqm->lock); list_add(&n->list, &dqm->queues); - retval = dqm->ops_asic_specific.register_process(dqm, qpd); + retval = dqm->asic_ops.update_qpd(dqm, qpd); dqm->processes_count++; @@ -431,7 +476,7 @@ static int register_process_nocpsch(struct device_queue_manager *dqm, return retval; } -static int unregister_process_nocpsch(struct device_queue_manager *dqm, +static int unregister_process(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { int retval; @@ -507,13 +552,13 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) dqm->allocated_queues[pipe] |= 1 << queue; } - dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1; + dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; return 0; } -static void uninitialize_nocpsch(struct device_queue_manager *dqm) +static void uninitialize(struct device_queue_manager *dqm) { int i; @@ -577,14 +622,14 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, if (retval) return retval; - q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; - q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM; + q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; + q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; pr_debug("SDMA id is: %d\n", q->sdma_id); pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); - dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); + dqm->asic_ops.init_sdma_vm(dqm, q, qpd); retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) @@ -613,8 +658,7 @@ static int set_sched_resources(struct device_queue_manager *dqm) int i, mec; struct scheduling_resources res; - res.vmid_mask = (1 << VMID_PER_DEVICE) - 1; - res.vmid_mask <<= KFD_VMID_START_OFFSET; + res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap; res.queue_mask = 0; for (i = 0; i < KGD_MAX_QUEUES; ++i) { @@ -652,8 +696,6 @@ static int set_sched_resources(struct device_queue_manager *dqm) static int initialize_cpsch(struct device_queue_manager *dqm) { - int retval; - pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); mutex_init(&dqm->lock); @@ -661,16 +703,13 @@ static int initialize_cpsch(struct device_queue_manager *dqm) dqm->queue_count = dqm->processes_count = 0; dqm->sdma_queue_count = 0; dqm->active_runlist = false; - retval = dqm->ops_asic_specific.initialize(dqm); - if (retval) - mutex_destroy(&dqm->lock); + dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; - return retval; + return 0; } static int start_cpsch(struct device_queue_manager *dqm) { - struct device_process_node *node; int retval; retval = 0; @@ -697,12 +736,9 @@ static int start_cpsch(struct device_queue_manager *dqm) init_interrupts(dqm); - list_for_each_entry(node, &dqm->queues, list) - if (node->qpd->pqm->process && dqm->dev) - kfd_bind_process_to_device(dqm->dev, - node->qpd->pqm->process); - - execute_queues_cpsch(dqm, true); + mutex_lock(&dqm->lock); + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + mutex_unlock(&dqm->lock); return 0; fail_allocate_vidmem: @@ -714,15 +750,10 @@ fail_packet_manager_init: static int stop_cpsch(struct device_queue_manager *dqm) { - struct device_process_node *node; - struct kfd_process_device *pdd; - - destroy_queues_cpsch(dqm, true, true); + mutex_lock(&dqm->lock); + unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); + mutex_unlock(&dqm->lock); - list_for_each_entry(node, &dqm->queues, list) { - pdd = qpd_to_pdd(node->qpd); - pdd->bound = false; - } kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); pm_uninit(&dqm->packets); @@ -752,7 +783,7 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, list_add(&kq->list, &qpd->priv_queue_list); dqm->queue_count++; qpd->is_debug = true; - execute_queues_cpsch(dqm, false); + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); mutex_unlock(&dqm->lock); return 0; @@ -763,12 +794,10 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { mutex_lock(&dqm->lock); - /* here we actually preempt the DIQ */ - destroy_queues_cpsch(dqm, true, false); list_del(&kq->list); dqm->queue_count--; qpd->is_debug = false; - execute_queues_cpsch(dqm, false); + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); /* * Unconditionally decrement this counter, regardless of the queue's * type. @@ -779,14 +808,6 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, mutex_unlock(&dqm->lock); } -static void select_sdma_engine_id(struct queue *q) -{ - static int sdma_id; - - q->sdma_id = sdma_id; - sdma_id = (sdma_id + 1) % 2; -} - static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd, int *allocate_vmid) { @@ -807,9 +828,15 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, goto out; } - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - select_sdma_engine_id(q); - + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + retval = allocate_sdma_queue(dqm, &q->sdma_id); + if (retval) + goto out; + q->properties.sdma_queue_id = + q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; + q->properties.sdma_engine_id = + q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; + } mqd = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); @@ -818,16 +845,18 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, goto out; } - dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); + dqm->asic_ops.init_sdma_vm(dqm, q, qpd); retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) goto out; list_add(&q->list, &qpd->queues_list); + qpd->queue_count++; if (q->properties.is_active) { dqm->queue_count++; - retval = execute_queues_cpsch(dqm, false); + retval = execute_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); } if (q->properties.type == KFD_QUEUE_TYPE_SDMA) @@ -848,12 +877,12 @@ out: int amdkfd_fence_wait_timeout(unsigned int *fence_addr, unsigned int fence_value, - unsigned long timeout) + unsigned int timeout_ms) { - timeout += jiffies; + unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; while (*fence_addr != fence_value) { - if (time_after(jiffies, timeout)) { + if (time_after(jiffies, end_jiffies)) { pr_err("qcm fence wait loop timeout expired\n"); return -ETIME; } @@ -863,44 +892,57 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr, return 0; } -static int destroy_sdma_queues(struct device_queue_manager *dqm, +static int unmap_sdma_queues(struct device_queue_manager *dqm, unsigned int sdma_engine) { return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, - KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, sdma_engine); } -static int destroy_queues_cpsch(struct device_queue_manager *dqm, - bool preempt_static_queues, bool lock) +/* dqm->lock mutex has to be locked before calling this function */ +static int map_queues_cpsch(struct device_queue_manager *dqm) { int retval; - enum kfd_preempt_type_filter preempt_type; - struct kfd_process_device *pdd; - retval = 0; + if (dqm->queue_count <= 0 || dqm->processes_count <= 0) + return 0; + + if (dqm->active_runlist) + return 0; + + retval = pm_send_runlist(&dqm->packets, &dqm->queues); + if (retval) { + pr_err("failed to execute runlist\n"); + return retval; + } + dqm->active_runlist = true; + + return retval; +} + +/* dqm->lock mutex has to be locked before calling this function */ +static int unmap_queues_cpsch(struct device_queue_manager *dqm, + enum kfd_unmap_queues_filter filter, + uint32_t filter_param) +{ + int retval = 0; - if (lock) - mutex_lock(&dqm->lock); if (!dqm->active_runlist) - goto out; + return retval; pr_debug("Before destroying queues, sdma queue count is : %u\n", dqm->sdma_queue_count); if (dqm->sdma_queue_count > 0) { - destroy_sdma_queues(dqm, 0); - destroy_sdma_queues(dqm, 1); + unmap_sdma_queues(dqm, 0); + unmap_sdma_queues(dqm, 1); } - preempt_type = preempt_static_queues ? - KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES : - KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES; - retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, - preempt_type, 0, false, 0); + filter, filter_param, false, 0); if (retval) - goto out; + return retval; *dqm->fence_addr = KFD_FENCE_INIT; pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, @@ -908,55 +950,29 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, /* should be timed out */ retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); - if (retval) { - pdd = kfd_get_process_device_data(dqm->dev, - kfd_get_process(current)); - pdd->reset_wavefronts = true; - goto out; - } + if (retval) + return retval; + pm_release_ib(&dqm->packets); dqm->active_runlist = false; -out: - if (lock) - mutex_unlock(&dqm->lock); return retval; } -static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock) +/* dqm->lock mutex has to be locked before calling this function */ +static int execute_queues_cpsch(struct device_queue_manager *dqm, + enum kfd_unmap_queues_filter filter, + uint32_t filter_param) { int retval; - if (lock) - mutex_lock(&dqm->lock); - - retval = destroy_queues_cpsch(dqm, false, false); - if (retval) { - pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption"); - goto out; - } - - if (dqm->queue_count <= 0 || dqm->processes_count <= 0) { - retval = 0; - goto out; - } - - if (dqm->active_runlist) { - retval = 0; - goto out; - } - - retval = pm_send_runlist(&dqm->packets, &dqm->queues); + retval = unmap_queues_cpsch(dqm, filter, filter_param); if (retval) { - pr_err("failed to execute runlist"); - goto out; + pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); + return retval; } - dqm->active_runlist = true; -out: - if (lock) - mutex_unlock(&dqm->lock); - return retval; + return map_queues_cpsch(dqm); } static int destroy_queue_cpsch(struct device_queue_manager *dqm, @@ -991,14 +1007,20 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, goto failed; } - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { dqm->sdma_queue_count--; + deallocate_sdma_queue(dqm, q->sdma_id); + } list_del(&q->list); + qpd->queue_count--; if (q->properties.is_active) dqm->queue_count--; - execute_queues_cpsch(dqm, false); + retval = execute_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + if (retval == -ETIME) + qpd->reset_wavefronts = true; mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); @@ -1068,7 +1090,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, qpd->sh_mem_ape1_limit = limit >> 16; } - retval = dqm->ops_asic_specific.set_cache_memory_policy( + retval = dqm->asic_ops.set_cache_memory_policy( dqm, qpd, default_policy, @@ -1088,6 +1110,109 @@ out: return retval; } +static int process_termination_nocpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct queue *q, *next; + struct device_process_node *cur, *next_dpn; + int retval = 0; + + mutex_lock(&dqm->lock); + + /* Clear all user mode queues */ + list_for_each_entry_safe(q, next, &qpd->queues_list, list) { + int ret; + + ret = destroy_queue_nocpsch_locked(dqm, qpd, q); + if (ret) + retval = ret; + } + + /* Unregister process */ + list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { + if (qpd == cur->qpd) { + list_del(&cur->list); + kfree(cur); + dqm->processes_count--; + break; + } + } + + mutex_unlock(&dqm->lock); + return retval; +} + + +static int process_termination_cpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + int retval; + struct queue *q, *next; + struct kernel_queue *kq, *kq_next; + struct mqd_manager *mqd; + struct device_process_node *cur, *next_dpn; + enum kfd_unmap_queues_filter filter = + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; + + retval = 0; + + mutex_lock(&dqm->lock); + + /* Clean all kernel queues */ + list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { + list_del(&kq->list); + dqm->queue_count--; + qpd->is_debug = false; + dqm->total_queue_count--; + filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; + } + + /* Clear all user mode queues */ + list_for_each_entry(q, &qpd->queues_list, list) { + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + dqm->sdma_queue_count--; + + if (q->properties.is_active) + dqm->queue_count--; + + dqm->total_queue_count--; + } + + /* Unregister process */ + list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { + if (qpd == cur->qpd) { + list_del(&cur->list); + kfree(cur); + dqm->processes_count--; + break; + } + } + + retval = execute_queues_cpsch(dqm, filter, 0); + if (retval || qpd->reset_wavefronts) { + pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); + dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); + qpd->reset_wavefronts = false; + } + + /* lastly, free mqd resources */ + list_for_each_entry_safe(q, next, &qpd->queues_list, list) { + mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); + if (!mqd) { + retval = -ENOMEM; + goto out; + } + list_del(&q->list); + qpd->queue_count--; + mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + } + +out: + mutex_unlock(&dqm->lock); + return retval; +} + struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) { struct device_queue_manager *dqm; @@ -1109,13 +1234,14 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.stop = stop_cpsch; dqm->ops.destroy_queue = destroy_queue_cpsch; dqm->ops.update_queue = update_queue; - dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch; - dqm->ops.register_process = register_process_nocpsch; - dqm->ops.unregister_process = unregister_process_nocpsch; - dqm->ops.uninitialize = uninitialize_nocpsch; + dqm->ops.get_mqd_manager = get_mqd_manager; + dqm->ops.register_process = register_process; + dqm->ops.unregister_process = unregister_process; + dqm->ops.uninitialize = uninitialize; dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; dqm->ops.set_cache_memory_policy = set_cache_memory_policy; + dqm->ops.process_termination = process_termination_cpsch; break; case KFD_SCHED_POLICY_NO_HWS: /* initialize dqm for no cp scheduling */ @@ -1124,12 +1250,13 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.create_queue = create_queue_nocpsch; dqm->ops.destroy_queue = destroy_queue_nocpsch; dqm->ops.update_queue = update_queue; - dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch; - dqm->ops.register_process = register_process_nocpsch; - dqm->ops.unregister_process = unregister_process_nocpsch; + dqm->ops.get_mqd_manager = get_mqd_manager; + dqm->ops.register_process = register_process; + dqm->ops.unregister_process = unregister_process; dqm->ops.initialize = initialize_nocpsch; - dqm->ops.uninitialize = uninitialize_nocpsch; + dqm->ops.uninitialize = uninitialize; dqm->ops.set_cache_memory_policy = set_cache_memory_policy; + dqm->ops.process_termination = process_termination_nocpsch; break; default: pr_err("Invalid scheduling policy %d\n", sched_policy); @@ -1138,12 +1265,16 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) switch (dev->device_info->asic_family) { case CHIP_CARRIZO: - device_queue_manager_init_vi(&dqm->ops_asic_specific); + device_queue_manager_init_vi(&dqm->asic_ops); break; case CHIP_KAVERI: - device_queue_manager_init_cik(&dqm->ops_asic_specific); + device_queue_manager_init_cik(&dqm->asic_ops); break; + default: + WARN(1, "Unexpected ASIC family %u", + dev->device_info->asic_family); + goto out_free; } if (!dqm->ops.initialize(dqm)) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index faf820a06400..5b77cb69f732 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -29,11 +29,9 @@ #include "kfd_priv.h" #include "kfd_mqd_manager.h" -#define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (500) -#define CIK_VMID_NUM (8) -#define KFD_VMID_START_OFFSET (8) -#define VMID_PER_DEVICE CIK_VMID_NUM -#define KFD_DQM_FIRST_PIPE (0) +#define KFD_UNMAP_LATENCY_MS (4000) +#define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (2 * KFD_UNMAP_LATENCY_MS + 1000) + #define CIK_SDMA_QUEUES (4) #define CIK_SDMA_QUEUES_PER_ENGINE (2) #define CIK_SDMA_ENGINE_NUM (2) @@ -79,6 +77,8 @@ struct device_process_node { * @set_cache_memory_policy: Sets memory policy (cached/ non cached) for the * memory apertures. * + * @process_termination: Clears all process queues belongs to that device. + * */ struct device_queue_manager_ops { @@ -122,12 +122,14 @@ struct device_queue_manager_ops { enum cache_policy alternate_policy, void __user *alternate_aperture_base, uint64_t alternate_aperture_size); + + int (*process_termination)(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); }; struct device_queue_manager_asic_ops { - int (*register_process)(struct device_queue_manager *dqm, + int (*update_qpd)(struct device_queue_manager *dqm, struct qcm_process_device *qpd); - int (*initialize)(struct device_queue_manager *dqm); bool (*set_cache_memory_policy)(struct device_queue_manager *dqm, struct qcm_process_device *qpd, enum cache_policy default_policy, @@ -153,7 +155,7 @@ struct device_queue_manager_asic_ops { struct device_queue_manager { struct device_queue_manager_ops ops; - struct device_queue_manager_asic_ops ops_asic_specific; + struct device_queue_manager_asic_ops asic_ops; struct mqd_manager *mqds[KFD_MQD_TYPE_MAX]; struct packet_manager packets; @@ -176,8 +178,10 @@ struct device_queue_manager { bool active_runlist; }; -void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops); -void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops); +void device_queue_manager_init_cik( + struct device_queue_manager_asic_ops *asic_ops); +void device_queue_manager_init_vi( + struct device_queue_manager_asic_ops *asic_ops); void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd); unsigned int get_queues_num(struct device_queue_manager *dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index 72c3cbabc0a7..28e48c90c596 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -32,18 +32,17 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, enum cache_policy alternate_policy, void __user *alternate_aperture_base, uint64_t alternate_aperture_size); -static int register_process_cik(struct device_queue_manager *dqm, +static int update_qpd_cik(struct device_queue_manager *dqm, struct qcm_process_device *qpd); -static int initialize_cpsch_cik(struct device_queue_manager *dqm); static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); -void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops) +void device_queue_manager_init_cik( + struct device_queue_manager_asic_ops *asic_ops) { - ops->set_cache_memory_policy = set_cache_memory_policy_cik; - ops->register_process = register_process_cik; - ops->initialize = initialize_cpsch_cik; - ops->init_sdma_vm = init_sdma_vm; + asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik; + asic_ops->update_qpd = update_qpd_cik; + asic_ops->init_sdma_vm = init_sdma_vm; } static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) @@ -99,7 +98,7 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, return true; } -static int register_process_cik(struct device_queue_manager *dqm, +static int update_qpd_cik(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { struct kfd_process_device *pdd; @@ -148,8 +147,3 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, q->properties.sdma_vm_addr = value; } - -static int initialize_cpsch_cik(struct device_queue_manager *dqm) -{ - return 0; -} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index 40e9ddd096cd..2fbce57a2f21 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -33,18 +33,17 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, enum cache_policy alternate_policy, void __user *alternate_aperture_base, uint64_t alternate_aperture_size); -static int register_process_vi(struct device_queue_manager *dqm, +static int update_qpd_vi(struct device_queue_manager *dqm, struct qcm_process_device *qpd); -static int initialize_cpsch_vi(struct device_queue_manager *dqm); static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); -void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops) +void device_queue_manager_init_vi( + struct device_queue_manager_asic_ops *asic_ops) { - ops->set_cache_memory_policy = set_cache_memory_policy_vi; - ops->register_process = register_process_vi; - ops->initialize = initialize_cpsch_vi; - ops->init_sdma_vm = init_sdma_vm; + asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi; + asic_ops->update_qpd = update_qpd_vi; + asic_ops->init_sdma_vm = init_sdma_vm; } static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) @@ -104,7 +103,7 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, return true; } -static int register_process_vi(struct device_queue_manager *dqm, +static int update_qpd_vi(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { struct kfd_process_device *pdd; @@ -160,8 +159,3 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, q->properties.sdma_vm_addr = value; } - -static int initialize_cpsch_vi(struct device_queue_manager *dqm) -{ - return 0; -} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 944abfad39c1..cb92d4b72400 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -24,8 +24,8 @@ #include <linux/slab.h> #include <linux/types.h> #include <linux/sched/signal.h> +#include <linux/sched/mm.h> #include <linux/uaccess.h> -#include <linux/mm.h> #include <linux/mman.h> #include <linux/memory.h> #include "kfd_priv.h" @@ -33,185 +33,89 @@ #include <linux/device.h> /* - * A task can only be on a single wait_queue at a time, but we need to support - * waiting on multiple events (any/all). - * Instead of each event simply having a wait_queue with sleeping tasks, it - * has a singly-linked list of tasks. - * A thread that wants to sleep creates an array of these, one for each event - * and adds one to each event's waiter chain. + * Wrapper around wait_queue_entry_t */ struct kfd_event_waiter { - struct list_head waiters; - struct task_struct *sleeping_task; - - /* Transitions to true when the event this belongs to is signaled. */ - bool activated; - - /* Event */ - struct kfd_event *event; - uint32_t input_index; + wait_queue_entry_t wait; + struct kfd_event *event; /* Event to wait for */ + bool activated; /* Becomes true when event is signaled */ }; /* - * Over-complicated pooled allocator for event notification slots. - * * Each signal event needs a 64-bit signal slot where the signaler will write - * a 1 before sending an interrupt.l (This is needed because some interrupts + * a 1 before sending an interrupt. (This is needed because some interrupts * do not contain enough spare data bits to identify an event.) - * We get whole pages from vmalloc and map them to the process VA. - * Individual signal events are then allocated a slot in a page. + * We get whole pages and map them to the process VA. + * Individual signal events use their event_id as slot index. */ - -struct signal_page { - struct list_head event_pages; /* kfd_process.signal_event_pages */ +struct kfd_signal_page { uint64_t *kernel_address; uint64_t __user *user_address; - uint32_t page_index; /* Index into the mmap aperture. */ - unsigned int free_slots; - unsigned long used_slot_bitmap[0]; }; -#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT -#define SLOT_BITMAP_SIZE BITS_TO_LONGS(SLOTS_PER_PAGE) -#define BITS_PER_PAGE (ilog2(SLOTS_PER_PAGE)+1) -#define SIGNAL_PAGE_SIZE (sizeof(struct signal_page) + \ - SLOT_BITMAP_SIZE * sizeof(long)) - -/* - * For signal events, the event ID is used as the interrupt user data. - * For SQ s_sendmsg interrupts, this is limited to 8 bits. - */ - -#define INTERRUPT_DATA_BITS 8 -#define SIGNAL_EVENT_ID_SLOT_SHIFT 0 -static uint64_t *page_slots(struct signal_page *page) +static uint64_t *page_slots(struct kfd_signal_page *page) { return page->kernel_address; } -static bool allocate_free_slot(struct kfd_process *process, - struct signal_page **out_page, - unsigned int *out_slot_index) -{ - struct signal_page *page; - - list_for_each_entry(page, &process->signal_event_pages, event_pages) { - if (page->free_slots > 0) { - unsigned int slot = - find_first_zero_bit(page->used_slot_bitmap, - SLOTS_PER_PAGE); - - __set_bit(slot, page->used_slot_bitmap); - page->free_slots--; - - page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT; - - *out_page = page; - *out_slot_index = slot; - - pr_debug("Allocated event signal slot in page %p, slot %d\n", - page, slot); - - return true; - } - } - - pr_debug("No free event signal slots were found for process %p\n", - process); - - return false; -} - -#define list_tail_entry(head, type, member) \ - list_entry((head)->prev, type, member) - -static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p) +static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) { void *backing_store; - struct signal_page *page; + struct kfd_signal_page *page; - page = kzalloc(SIGNAL_PAGE_SIZE, GFP_KERNEL); + page = kzalloc(sizeof(*page), GFP_KERNEL); if (!page) - goto fail_alloc_signal_page; + return NULL; - page->free_slots = SLOTS_PER_PAGE; - - backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, + backing_store = (void *) __get_free_pages(GFP_KERNEL, get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); if (!backing_store) goto fail_alloc_signal_store; - /* prevent user-mode info leaks */ + /* Initialize all events to unsignaled */ memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT, - KFD_SIGNAL_EVENT_LIMIT * 8); + KFD_SIGNAL_EVENT_LIMIT * 8); page->kernel_address = backing_store; - - if (list_empty(&p->signal_event_pages)) - page->page_index = 0; - else - page->page_index = list_tail_entry(&p->signal_event_pages, - struct signal_page, - event_pages)->page_index + 1; - pr_debug("Allocated new event signal page at %p, for process %p\n", page, p); - pr_debug("Page index is %d\n", page->page_index); - list_add(&page->event_pages, &p->signal_event_pages); - - return true; + return page; fail_alloc_signal_store: kfree(page); -fail_alloc_signal_page: - return false; + return NULL; } -static bool allocate_event_notification_slot(struct file *devkfd, - struct kfd_process *p, - struct signal_page **page, - unsigned int *signal_slot_index) +static int allocate_event_notification_slot(struct kfd_process *p, + struct kfd_event *ev) { - bool ret; + int id; - ret = allocate_free_slot(p, page, signal_slot_index); - if (!ret) { - ret = allocate_signal_page(devkfd, p); - if (ret) - ret = allocate_free_slot(p, page, signal_slot_index); + if (!p->signal_page) { + p->signal_page = allocate_signal_page(p); + if (!p->signal_page) + return -ENOMEM; + /* Oldest user mode expects 256 event slots */ + p->signal_mapped_size = 256*8; } - return ret; -} - -/* Assumes that the process's event_mutex is locked. */ -static void release_event_notification_slot(struct signal_page *page, - size_t slot_index) -{ - __clear_bit(slot_index, page->used_slot_bitmap); - page->free_slots++; - - /* We don't free signal pages, they are retained by the process - * and reused until it exits. - */ -} - -static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, - unsigned int page_index) -{ - struct signal_page *page; - /* - * This is safe because we don't delete signal pages until the - * process exits. + * Compatibility with old user mode: Only use signal slots + * user mode has mapped, may be less than + * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase + * of the event limit without breaking user mode. */ - list_for_each_entry(page, &p->signal_event_pages, event_pages) - if (page->page_index == page_index) - return page; + id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8, + GFP_KERNEL); + if (id < 0) + return id; - return NULL; + ev->event_id = id; + page_slots(p->signal_page)[id] = UNSIGNALED_EVENT_SLOT; + + return 0; } /* @@ -220,99 +124,81 @@ static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, */ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) { - struct kfd_event *ev; - - hash_for_each_possible(p->events, ev, events, id) - if (ev->event_id == id) - return ev; - - return NULL; + return idr_find(&p->event_idr, id); } -static u32 make_signal_event_id(struct signal_page *page, - unsigned int signal_slot_index) -{ - return page->page_index | - (signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT); -} - -/* - * Produce a kfd event id for a nonsignal event. - * These are arbitrary numbers, so we do a sequential search through - * the hash table for an unused number. +/** + * lookup_signaled_event_by_partial_id - Lookup signaled event from partial ID + * @p: Pointer to struct kfd_process + * @id: ID to look up + * @bits: Number of valid bits in @id + * + * Finds the first signaled event with a matching partial ID. If no + * matching signaled event is found, returns NULL. In that case the + * caller should assume that the partial ID is invalid and do an + * exhaustive search of all siglaned events. + * + * If multiple events with the same partial ID signal at the same + * time, they will be found one interrupt at a time, not necessarily + * in the same order the interrupts occurred. As long as the number of + * interrupts is correct, all signaled events will be seen by the + * driver. */ -static u32 make_nonsignal_event_id(struct kfd_process *p) +static struct kfd_event *lookup_signaled_event_by_partial_id( + struct kfd_process *p, uint32_t id, uint32_t bits) { - u32 id; - - for (id = p->next_nonsignal_event_id; - id < KFD_LAST_NONSIGNAL_EVENT_ID && - lookup_event_by_id(p, id); - id++) - ; + struct kfd_event *ev; - if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { + if (!p->signal_page || id >= KFD_SIGNAL_EVENT_LIMIT) + return NULL; - /* - * What if id == LAST_NONSIGNAL_EVENT_ID - 1? - * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so - * the first loop fails immediately and we proceed with the - * wraparound loop below. - */ - p->next_nonsignal_event_id = id + 1; + /* Fast path for the common case that @id is not a partial ID + * and we only need a single lookup. + */ + if (bits > 31 || (1U << bits) >= KFD_SIGNAL_EVENT_LIMIT) { + if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT) + return NULL; - return id; + return idr_find(&p->event_idr, id); } - for (id = KFD_FIRST_NONSIGNAL_EVENT_ID; - id < KFD_LAST_NONSIGNAL_EVENT_ID && - lookup_event_by_id(p, id); - id++) - ; - + /* General case for partial IDs: Iterate over all matching IDs + * and find the first one that has signaled. + */ + for (ev = NULL; id < KFD_SIGNAL_EVENT_LIMIT && !ev; id += 1U << bits) { + if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT) + continue; - if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { - p->next_nonsignal_event_id = id + 1; - return id; + ev = idr_find(&p->event_idr, id); } - p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; - return 0; -} - -static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p, - struct signal_page *page, - unsigned int signal_slot) -{ - return lookup_event_by_id(p, make_signal_event_id(page, signal_slot)); + return ev; } static int create_signal_event(struct file *devkfd, struct kfd_process *p, struct kfd_event *ev) { - if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) { + int ret; + + if (p->signal_mapped_size && + p->signal_event_count == p->signal_mapped_size / 8) { if (!p->signal_event_limit_reached) { pr_warn("Signal event wasn't created because limit was reached\n"); p->signal_event_limit_reached = true; } - return -ENOMEM; + return -ENOSPC; } - if (!allocate_event_notification_slot(devkfd, p, &ev->signal_page, - &ev->signal_slot_index)) { + ret = allocate_event_notification_slot(p, ev); + if (ret) { pr_warn("Signal event wasn't created because out of kernel memory\n"); - return -ENOMEM; + return ret; } p->signal_event_count++; - ev->user_signal_address = - &ev->signal_page->user_address[ev->signal_slot_index]; - - ev->event_id = make_signal_event_id(ev->signal_page, - ev->signal_slot_index); - + ev->user_signal_address = &p->signal_page->user_address[ev->event_id]; pr_debug("Signal event number %zu created with id %d, address %p\n", p->signal_event_count, ev->event_id, ev->user_signal_address); @@ -320,16 +206,20 @@ static int create_signal_event(struct file *devkfd, return 0; } -/* - * No non-signal events are supported yet. - * We create them as events that never signal. - * Set event calls from user-mode are failed. - */ static int create_other_event(struct kfd_process *p, struct kfd_event *ev) { - ev->event_id = make_nonsignal_event_id(p); - if (ev->event_id == 0) - return -ENOMEM; + /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an + * intentional integer overflow to -1 without a compiler + * warning. idr_alloc treats a negative value as "maximum + * signed integer". + */ + int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID, + (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1, + GFP_KERNEL); + + if (id < 0) + return id; + ev->event_id = id; return 0; } @@ -337,50 +227,47 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev) void kfd_event_init_process(struct kfd_process *p) { mutex_init(&p->event_mutex); - hash_init(p->events); - INIT_LIST_HEAD(&p->signal_event_pages); - p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; + idr_init(&p->event_idr); + p->signal_page = NULL; p->signal_event_count = 0; } static void destroy_event(struct kfd_process *p, struct kfd_event *ev) { - if (ev->signal_page) { - release_event_notification_slot(ev->signal_page, - ev->signal_slot_index); - p->signal_event_count--; - } + struct kfd_event_waiter *waiter; - /* - * Abandon the list of waiters. Individual waiting threads will - * clean up their own data. - */ - list_del(&ev->waiters); + /* Wake up pending waiters. They will return failure */ + list_for_each_entry(waiter, &ev->wq.head, wait.entry) + waiter->event = NULL; + wake_up_all(&ev->wq); + + if (ev->type == KFD_EVENT_TYPE_SIGNAL || + ev->type == KFD_EVENT_TYPE_DEBUG) + p->signal_event_count--; - hash_del(&ev->events); + idr_remove(&p->event_idr, ev->event_id); kfree(ev); } static void destroy_events(struct kfd_process *p) { struct kfd_event *ev; - struct hlist_node *tmp; - unsigned int hash_bkt; + uint32_t id; - hash_for_each_safe(p->events, hash_bkt, tmp, ev, events) + idr_for_each_entry(&p->event_idr, ev, id) destroy_event(p, ev); + idr_destroy(&p->event_idr); } /* * We assume that the process is being destroyed and there is no need to * unmap the pages or keep bookkeeping data in order. */ -static void shutdown_signal_pages(struct kfd_process *p) +static void shutdown_signal_page(struct kfd_process *p) { - struct signal_page *page, *tmp; + struct kfd_signal_page *page = p->signal_page; - list_for_each_entry_safe(page, tmp, &p->signal_event_pages, - event_pages) { + if (page) { free_pages((unsigned long)page->kernel_address, get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); kfree(page); @@ -390,7 +277,7 @@ static void shutdown_signal_pages(struct kfd_process *p) void kfd_event_free_process(struct kfd_process *p) { destroy_events(p); - shutdown_signal_pages(p); + shutdown_signal_page(p); } static bool event_can_be_gpu_signaled(const struct kfd_event *ev) @@ -419,7 +306,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, ev->auto_reset = auto_reset; ev->signaled = false; - INIT_LIST_HEAD(&ev->waiters); + init_waitqueue_head(&ev->wq); *event_page_offset = 0; @@ -430,10 +317,9 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, case KFD_EVENT_TYPE_DEBUG: ret = create_signal_event(devkfd, p, ev); if (!ret) { - *event_page_offset = (ev->signal_page->page_index | - KFD_MMAP_EVENTS_MASK); + *event_page_offset = KFD_MMAP_EVENTS_MASK; *event_page_offset <<= PAGE_SHIFT; - *event_slot_index = ev->signal_slot_index; + *event_slot_index = ev->event_id; } break; default: @@ -442,8 +328,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, } if (!ret) { - hash_add(p->events, &ev->events, ev->event_id); - *event_id = ev->event_id; *event_trigger_data = ev->event_id; } else { @@ -477,19 +361,18 @@ int kfd_event_destroy(struct kfd_process *p, uint32_t event_id) static void set_event(struct kfd_event *ev) { struct kfd_event_waiter *waiter; - struct kfd_event_waiter *next; - /* Auto reset if the list is non-empty and we're waking someone. */ - ev->signaled = !ev->auto_reset || list_empty(&ev->waiters); + /* Auto reset if the list is non-empty and we're waking + * someone. waitqueue_active is safe here because we're + * protected by the p->event_mutex, which is also held when + * updating the wait queues in kfd_wait_on_events. + */ + ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq); - list_for_each_entry_safe(waiter, next, &ev->waiters, waiters) { + list_for_each_entry(waiter, &ev->wq.head, wait.entry) waiter->activated = true; - /* _init because free_waiters will call list_del */ - list_del_init(&waiter->waiters); - - wake_up_process(waiter->sleeping_task); - } + wake_up_all(&ev->wq); } /* Assumes that p is current. */ @@ -538,13 +421,7 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id) static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) { - page_slots(ev->signal_page)[ev->signal_slot_index] = - UNSIGNALED_EVENT_SLOT; -} - -static bool is_slot_signaled(struct signal_page *page, unsigned int index) -{ - return page_slots(page)[index] != UNSIGNALED_EVENT_SLOT; + page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT; } static void set_event_from_interrupt(struct kfd_process *p, @@ -559,7 +436,7 @@ static void set_event_from_interrupt(struct kfd_process *p, void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, uint32_t valid_id_bits) { - struct kfd_event *ev; + struct kfd_event *ev = NULL; /* * Because we are called from arbitrary context (workqueue) as opposed @@ -573,26 +450,46 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, mutex_lock(&p->event_mutex); - if (valid_id_bits >= INTERRUPT_DATA_BITS) { - /* Partial ID is a full ID. */ - ev = lookup_event_by_id(p, partial_id); + if (valid_id_bits) + ev = lookup_signaled_event_by_partial_id(p, partial_id, + valid_id_bits); + if (ev) { set_event_from_interrupt(p, ev); - } else { + } else if (p->signal_page) { /* - * Partial ID is in fact partial. For now we completely - * ignore it, but we could use any bits we did receive to - * search faster. + * Partial ID lookup failed. Assume that the event ID + * in the interrupt payload was invalid and do an + * exhaustive search of signaled events. */ - struct signal_page *page; - unsigned int i; - - list_for_each_entry(page, &p->signal_event_pages, event_pages) - for (i = 0; i < SLOTS_PER_PAGE; i++) - if (is_slot_signaled(page, i)) { - ev = lookup_event_by_page_slot(p, - page, i); + uint64_t *slots = page_slots(p->signal_page); + uint32_t id; + + if (valid_id_bits) + pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n", + partial_id, valid_id_bits); + + if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) { + /* With relatively few events, it's faster to + * iterate over the event IDR + */ + idr_for_each_entry(&p->event_idr, ev, id) { + if (id >= KFD_SIGNAL_EVENT_LIMIT) + break; + + if (slots[id] != UNSIGNALED_EVENT_SLOT) + set_event_from_interrupt(p, ev); + } + } else { + /* With relatively many events, it's faster to + * iterate over the signal slots and lookup + * only signaled events from the IDR. + */ + for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++) + if (slots[id] != UNSIGNALED_EVENT_SLOT) { + ev = lookup_event_by_id(p, id); set_event_from_interrupt(p, ev); } + } } mutex_unlock(&p->event_mutex); @@ -609,18 +506,16 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) GFP_KERNEL); for (i = 0; (event_waiters) && (i < num_events) ; i++) { - INIT_LIST_HEAD(&event_waiters[i].waiters); - event_waiters[i].sleeping_task = current; + init_wait(&event_waiters[i].wait); event_waiters[i].activated = false; } return event_waiters; } -static int init_event_waiter(struct kfd_process *p, +static int init_event_waiter_get_status(struct kfd_process *p, struct kfd_event_waiter *waiter, - uint32_t event_id, - uint32_t input_index) + uint32_t event_id) { struct kfd_event *ev = lookup_event_by_id(p, event_id); @@ -628,38 +523,60 @@ static int init_event_waiter(struct kfd_process *p, return -EINVAL; waiter->event = ev; - waiter->input_index = input_index; waiter->activated = ev->signaled; ev->signaled = ev->signaled && !ev->auto_reset; - list_add(&waiter->waiters, &ev->waiters); - return 0; } -static bool test_event_condition(bool all, uint32_t num_events, +static void init_event_waiter_add_to_waitlist(struct kfd_event_waiter *waiter) +{ + struct kfd_event *ev = waiter->event; + + /* Only add to the wait list if we actually need to + * wait on this event. + */ + if (!waiter->activated) + add_wait_queue(&ev->wq, &waiter->wait); +} + +/* test_event_condition - Test condition of events being waited for + * @all: Return completion only if all events have signaled + * @num_events: Number of events to wait for + * @event_waiters: Array of event waiters, one per event + * + * Returns KFD_IOC_WAIT_RESULT_COMPLETE if all (or one) event(s) have + * signaled. Returns KFD_IOC_WAIT_RESULT_TIMEOUT if no (or not all) + * events have signaled. Returns KFD_IOC_WAIT_RESULT_FAIL if any of + * the events have been destroyed. + */ +static uint32_t test_event_condition(bool all, uint32_t num_events, struct kfd_event_waiter *event_waiters) { uint32_t i; uint32_t activated_count = 0; for (i = 0; i < num_events; i++) { + if (!event_waiters[i].event) + return KFD_IOC_WAIT_RESULT_FAIL; + if (event_waiters[i].activated) { if (!all) - return true; + return KFD_IOC_WAIT_RESULT_COMPLETE; activated_count++; } } - return activated_count == num_events; + return activated_count == num_events ? + KFD_IOC_WAIT_RESULT_COMPLETE : KFD_IOC_WAIT_RESULT_TIMEOUT; } /* * Copy event specific data, if defined. * Currently only memory exception events have additional data to copy to user */ -static bool copy_signaled_event_data(uint32_t num_events, +static int copy_signaled_event_data(uint32_t num_events, struct kfd_event_waiter *event_waiters, struct kfd_event_data __user *data) { @@ -673,15 +590,15 @@ static bool copy_signaled_event_data(uint32_t num_events, waiter = &event_waiters[i]; event = waiter->event; if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) { - dst = &data[waiter->input_index].memory_exception_data; + dst = &data[i].memory_exception_data; src = &event->memory_exception_data; if (copy_to_user(dst, src, sizeof(struct kfd_hsa_memory_exception_data))) - return false; + return -EFAULT; } } - return true; + return 0; } @@ -710,7 +627,9 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters) uint32_t i; for (i = 0; i < num_events; i++) - list_del(&waiters[i].waiters); + if (waiters[i].event) + remove_wait_queue(&waiters[i].event->wq, + &waiters[i].wait); kfree(waiters); } @@ -718,38 +637,56 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters) int kfd_wait_on_events(struct kfd_process *p, uint32_t num_events, void __user *data, bool all, uint32_t user_timeout_ms, - enum kfd_event_wait_result *wait_result) + uint32_t *wait_result) { struct kfd_event_data __user *events = (struct kfd_event_data __user *) data; uint32_t i; int ret = 0; + struct kfd_event_waiter *event_waiters = NULL; long timeout = user_timeout_to_jiffies(user_timeout_ms); - mutex_lock(&p->event_mutex); - event_waiters = alloc_event_waiters(num_events); if (!event_waiters) { ret = -ENOMEM; - goto fail; + goto out; } + mutex_lock(&p->event_mutex); + for (i = 0; i < num_events; i++) { struct kfd_event_data event_data; if (copy_from_user(&event_data, &events[i], sizeof(struct kfd_event_data))) { ret = -EFAULT; - goto fail; + goto out_unlock; } - ret = init_event_waiter(p, &event_waiters[i], - event_data.event_id, i); + ret = init_event_waiter_get_status(p, &event_waiters[i], + event_data.event_id); if (ret) - goto fail; + goto out_unlock; } + /* Check condition once. */ + *wait_result = test_event_condition(all, num_events, event_waiters); + if (*wait_result == KFD_IOC_WAIT_RESULT_COMPLETE) { + ret = copy_signaled_event_data(num_events, + event_waiters, events); + goto out_unlock; + } else if (WARN_ON(*wait_result == KFD_IOC_WAIT_RESULT_FAIL)) { + /* This should not happen. Events shouldn't be + * destroyed while we're holding the event_mutex + */ + goto out_unlock; + } + + /* Add to wait lists if we need to wait. */ + for (i = 0; i < num_events; i++) + init_event_waiter_add_to_waitlist(&event_waiters[i]); + mutex_unlock(&p->event_mutex); while (true) { @@ -771,62 +708,66 @@ int kfd_wait_on_events(struct kfd_process *p, break; } - if (test_event_condition(all, num_events, event_waiters)) { - if (copy_signaled_event_data(num_events, - event_waiters, events)) - *wait_result = KFD_WAIT_COMPLETE; - else - *wait_result = KFD_WAIT_ERROR; + /* Set task state to interruptible sleep before + * checking wake-up conditions. A concurrent wake-up + * will put the task back into runnable state. In that + * case schedule_timeout will not put the task to + * sleep and we'll get a chance to re-check the + * updated conditions almost immediately. Otherwise, + * this race condition would lead to a soft hang or a + * very long sleep. + */ + set_current_state(TASK_INTERRUPTIBLE); + + *wait_result = test_event_condition(all, num_events, + event_waiters); + if (*wait_result != KFD_IOC_WAIT_RESULT_TIMEOUT) break; - } - if (timeout <= 0) { - *wait_result = KFD_WAIT_TIMEOUT; + if (timeout <= 0) break; - } - timeout = schedule_timeout_interruptible(timeout); + timeout = schedule_timeout(timeout); } __set_current_state(TASK_RUNNING); + /* copy_signaled_event_data may sleep. So this has to happen + * after the task state is set back to RUNNING. + */ + if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE) + ret = copy_signaled_event_data(num_events, + event_waiters, events); + mutex_lock(&p->event_mutex); +out_unlock: free_waiters(num_events, event_waiters); mutex_unlock(&p->event_mutex); - - return ret; - -fail: - if (event_waiters) - free_waiters(num_events, event_waiters); - - mutex_unlock(&p->event_mutex); - - *wait_result = KFD_WAIT_ERROR; +out: + if (ret) + *wait_result = KFD_IOC_WAIT_RESULT_FAIL; + else if (*wait_result == KFD_IOC_WAIT_RESULT_FAIL) + ret = -EIO; return ret; } int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) { - - unsigned int page_index; unsigned long pfn; - struct signal_page *page; + struct kfd_signal_page *page; + int ret; - /* check required size is logical */ - if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) != + /* check required size doesn't exceed the allocated size */ + if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) < get_order(vma->vm_end - vma->vm_start)) { pr_err("Event page mmap requested illegal size\n"); return -EINVAL; } - page_index = vma->vm_pgoff; - - page = lookup_signal_page_by_index(p, page_index); + page = p->signal_page; if (!page) { /* Probably KFD bug, but mmap is user-accessible. */ - pr_debug("Signal page could not be found for page_index %u\n", - page_index); + pr_debug("Signal page could not be found\n"); return -EINVAL; } @@ -847,8 +788,12 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) page->user_address = (uint64_t __user *)vma->vm_start; /* mapping the page to user process */ - return remap_pfn_range(vma, vma->vm_start, pfn, + ret = remap_pfn_range(vma, vma->vm_start, pfn, vma->vm_end - vma->vm_start, vma->vm_page_prot); + if (!ret) + p->signal_mapped_size = vma->vm_end - vma->vm_start; + + return ret; } /* @@ -860,12 +805,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, { struct kfd_hsa_memory_exception_data *ev_data; struct kfd_event *ev; - int bkt; + uint32_t id; bool send_signal = true; ev_data = (struct kfd_hsa_memory_exception_data *) event_data; - hash_for_each(p->events, bkt, ev, events) + id = KFD_FIRST_NONSIGNAL_EVENT_ID; + idr_for_each_entry_continue(&p->event_idr, ev, id) if (ev->type == type) { send_signal = false; dev_dbg(kfd_device, @@ -904,14 +850,24 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, * running so the lookup function returns a locked process. */ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); + struct mm_struct *mm; if (!p) return; /* Presumably process exited. */ + /* Take a safe reference to the mm_struct, which may otherwise + * disappear even while the kfd_process is still referenced. + */ + mm = get_task_mm(p->lead_thread); + if (!mm) { + mutex_unlock(&p->mutex); + return; /* Process is exiting */ + } + memset(&memory_exception_data, 0, sizeof(memory_exception_data)); - down_read(&p->mm->mmap_sem); - vma = find_vma(p->mm, address); + down_read(&mm->mmap_sem); + vma = find_vma(mm, address); memory_exception_data.gpu_id = dev->id; memory_exception_data.va = address; @@ -937,7 +893,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, } } - up_read(&p->mm->mmap_sem); + up_read(&mm->mmap_sem); + mmput(mm); mutex_lock(&p->event_mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h index 28f6838b1f4c..abca5bfebbff 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h @@ -27,12 +27,17 @@ #include <linux/hashtable.h> #include <linux/types.h> #include <linux/list.h> +#include <linux/wait.h> #include "kfd_priv.h" #include <uapi/linux/kfd_ioctl.h> -#define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U -#define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK -#define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX +/* + * IDR supports non-negative integer IDs. Small IDs are used for + * signal events to match their signal slot. Use the upper half of the + * ID space for non-signal events. + */ +#define KFD_FIRST_NONSIGNAL_EVENT_ID ((INT_MAX >> 1) + 1) +#define KFD_LAST_NONSIGNAL_EVENT_ID INT_MAX /* * Written into kfd_signal_slot_t to indicate that the event is not signaled. @@ -46,9 +51,6 @@ struct kfd_event_waiter; struct signal_page; struct kfd_event { - /* All events in process, rooted at kfd_process.events. */ - struct hlist_node events; - u32 event_id; bool signaled; @@ -56,11 +58,9 @@ struct kfd_event { int type; - struct list_head waiters; /* List of kfd_event_waiter by waiters. */ + wait_queue_head_t wq; /* List of event waiters. */ /* Only for signal events. */ - struct signal_page *signal_page; - unsigned int signal_slot_index; uint64_t __user *user_signal_address; /* type specific data */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index 70b3a99cffc2..035c351f47c5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -42,26 +42,26 @@ #include <linux/slab.h> #include <linux/device.h> +#include <linux/kfifo.h> #include "kfd_priv.h" -#define KFD_INTERRUPT_RING_SIZE 1024 +#define KFD_IH_NUM_ENTRIES 8192 static void interrupt_wq(struct work_struct *); int kfd_interrupt_init(struct kfd_dev *kfd) { - void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE, - kfd->device_info->ih_ring_entry_size, - GFP_KERNEL); - if (!interrupt_ring) - return -ENOMEM; - - kfd->interrupt_ring = interrupt_ring; - kfd->interrupt_ring_size = - KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size; - atomic_set(&kfd->interrupt_ring_wptr, 0); - atomic_set(&kfd->interrupt_ring_rptr, 0); + int r; + + r = kfifo_alloc(&kfd->ih_fifo, + KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size, + GFP_KERNEL); + if (r) { + dev_err(kfd_chardev(), "Failed to allocate IH fifo\n"); + return r; + } + kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1); spin_lock_init(&kfd->interrupt_lock); INIT_WORK(&kfd->interrupt_work, interrupt_wq); @@ -92,74 +92,47 @@ void kfd_interrupt_exit(struct kfd_dev *kfd) spin_unlock_irqrestore(&kfd->interrupt_lock, flags); /* - * Flush_scheduled_work ensures that there are no outstanding + * flush_work ensures that there are no outstanding * work-queue items that will access interrupt_ring. New work items * can't be created because we stopped interrupt handling above. */ - flush_scheduled_work(); + flush_workqueue(kfd->ih_wq); - kfree(kfd->interrupt_ring); + kfifo_free(&kfd->ih_fifo); } /* - * This assumes that it can't be called concurrently with itself - * but only with dequeue_ih_ring_entry. + * Assumption: single reader/writer. This function is not re-entrant */ bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry) { - unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); - unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr); + int count; - if ((rptr - wptr) % kfd->interrupt_ring_size == - kfd->device_info->ih_ring_entry_size) { - /* This is very bad, the system is likely to hang. */ + count = kfifo_in(&kfd->ih_fifo, ih_ring_entry, + kfd->device_info->ih_ring_entry_size); + if (count != kfd->device_info->ih_ring_entry_size) { dev_err_ratelimited(kfd_chardev(), - "Interrupt ring overflow, dropping interrupt.\n"); + "Interrupt ring overflow, dropping interrupt %d\n", + count); return false; } - memcpy(kfd->interrupt_ring + wptr, ih_ring_entry, - kfd->device_info->ih_ring_entry_size); - - wptr = (wptr + kfd->device_info->ih_ring_entry_size) % - kfd->interrupt_ring_size; - smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */ - atomic_set(&kfd->interrupt_ring_wptr, wptr); - return true; } /* - * This assumes that it can't be called concurrently with itself - * but only with enqueue_ih_ring_entry. + * Assumption: single reader/writer. This function is not re-entrant */ static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry) { - /* - * Assume that wait queues have an implicit barrier, i.e. anything that - * happened in the ISR before it queued work is visible. - */ - - unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr); - unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); + int count; - if (rptr == wptr) - return false; - - memcpy(ih_ring_entry, kfd->interrupt_ring + rptr, - kfd->device_info->ih_ring_entry_size); - - rptr = (rptr + kfd->device_info->ih_ring_entry_size) % - kfd->interrupt_ring_size; + count = kfifo_out(&kfd->ih_fifo, ih_ring_entry, + kfd->device_info->ih_ring_entry_size); - /* - * Ensure the rptr write update is not visible until - * memcpy has finished reading. - */ - smp_mb(); - atomic_set(&kfd->interrupt_ring_rptr, rptr); + WARN_ON(count && count != kfd->device_info->ih_ring_entry_size); - return true; + return count == kfd->device_info->ih_ring_entry_size; } static void interrupt_wq(struct work_struct *work) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index ed71ad40e8f7..8b0c0645d7c0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -185,7 +185,7 @@ static void uninitialize(struct kernel_queue *kq) kq->mqd->destroy_mqd(kq->mqd, kq->queue->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_RESET, - QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, + KFD_UNMAP_LATENCY_MS, kq->queue->pipe, kq->queue->queue); else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ) @@ -303,14 +303,20 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, case CHIP_KAVERI: kernel_queue_init_cik(&kq->ops_asic_specific); break; + default: + WARN(1, "Unexpected ASIC family %u", + dev->device_info->asic_family); + goto out_free; } - if (!kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE)) { - pr_err("Failed to init kernel queue\n"); - kfree(kq); - return NULL; - } - return kq; + if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE)) + return kq; + + pr_err("Failed to init kernel queue\n"); + +out_free: + kfree(kq); + return NULL; } void kernel_queue_uninit(struct kernel_queue *kq) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index b1ef1368c3bb..dfd260ef81ff 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -31,6 +31,9 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, return mqd_manager_init_cik(type, dev); case CHIP_CARRIZO: return mqd_manager_init_vi(type, dev); + default: + WARN(1, "Unexpected ASIC family %u", + dev->device_info->asic_family); } return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 44ffd23348fc..4859d263fa2a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -189,12 +189,9 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, if (q->format == KFD_QUEUE_FORMAT_AQL) m->cp_hqd_pq_control |= NO_UPDATE_RPTR; - q->is_active = false; - if (q->queue_size > 0 && + q->is_active = (q->queue_size > 0 && q->queue_address != 0 && - q->queue_percent > 0) { - q->is_active = true; - } + q->queue_percent > 0); return 0; } @@ -215,24 +212,17 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8); m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); - m->sdma_rlc_doorbell = q->doorbell_off << - SDMA0_RLC0_DOORBELL__OFFSET__SHIFT | - 1 << SDMA0_RLC0_DOORBELL__ENABLE__SHIFT; + m->sdma_rlc_doorbell = + q->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT; m->sdma_rlc_virtual_addr = q->sdma_vm_addr; m->sdma_engine_id = q->sdma_engine_id; m->sdma_queue_id = q->sdma_queue_id; - q->is_active = false; - if (q->queue_size > 0 && + q->is_active = (q->queue_size > 0 && q->queue_address != 0 && - q->queue_percent > 0) { - m->sdma_rlc_rb_cntl |= - 1 << SDMA0_RLC0_RB_CNTL__RB_ENABLE__SHIFT; - - q->is_active = true; - } + q->queue_percent > 0); return 0; } @@ -359,19 +349,13 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); - m->cp_hqd_pq_doorbell_control = DOORBELL_EN | - DOORBELL_OFFSET(q->doorbell_off); + m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off); m->cp_hqd_vmid = q->vmid; - m->cp_hqd_active = 0; - q->is_active = false; - if (q->queue_size > 0 && + q->is_active = (q->queue_size > 0 && q->queue_address != 0 && - q->queue_percent > 0) { - m->cp_hqd_active = 1; - q->is_active = true; - } + q->queue_percent > 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 73cbfe186dd2..4ea854f9007b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -163,12 +163,9 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT; } - q->is_active = false; - if (q->queue_size > 0 && + q->is_active = (q->queue_size > 0 && q->queue_address != 0 && - q->queue_percent > 0) { - q->is_active = true; - } + q->queue_percent > 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 1d312603de9f..16da8ad02d8b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -140,8 +140,6 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, struct qcm_process_device *qpd) { struct pm4_mes_map_process *packet; - struct queue *cur; - uint32_t num_queues; packet = (struct pm4_mes_map_process *)buffer; @@ -156,10 +154,7 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, packet->bitfields10.gds_size = qpd->gds_size; packet->bitfields10.num_gws = qpd->num_gws; packet->bitfields10.num_oac = qpd->num_oac; - num_queues = 0; - list_for_each_entry(cur, &qpd->queues_list, list) - num_queues++; - packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : num_queues; + packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; packet->sh_mem_config = qpd->sh_mem_config; packet->sh_mem_bases = qpd->sh_mem_bases; @@ -208,7 +203,7 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, queue_type__mes_map_queues__debug_interface_queue_vi; break; case KFD_QUEUE_TYPE_SDMA: - packet->bitfields2.engine_sel = + packet->bitfields2.engine_sel = q->properties.sdma_engine_id + engine_sel__mes_map_queues__sdma0_vi; use_static = false; /* no static queues under SDMA */ break; @@ -376,7 +371,7 @@ int pm_send_set_resources(struct packet_manager *pm, packet->bitfields2.queue_type = queue_type__mes_set_resources__hsa_interface_queue_hiq; packet->bitfields2.vmid_mask = res->vmid_mask; - packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY; + packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; packet->bitfields7.oac_mask = res->oac_mask; packet->bitfields8.gds_heap_base = res->gds_heap_base; packet->bitfields8.gds_heap_size = res->gds_heap_size; @@ -476,7 +471,7 @@ fail_acquire_packet_buffer: } int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, - enum kfd_preempt_type_filter mode, + enum kfd_unmap_queues_filter filter, uint32_t filter_param, bool reset, unsigned int sdma_engine) { @@ -494,8 +489,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, packet = (struct pm4_mes_unmap_queues *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues)); - pr_debug("static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n", - mode, reset, type); + pr_debug("static_queue: unmapping queues: filter is %d , reset is %d , type is %d\n", + filter, reset, type); packet->header.u32All = build_pm4_header(IT_UNMAP_QUEUES, sizeof(struct pm4_mes_unmap_queues)); switch (type) { @@ -521,29 +516,29 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, packet->bitfields2.action = action__mes_unmap_queues__preempt_queues; - switch (mode) { - case KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE: + switch (filter) { + case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: packet->bitfields2.queue_sel = queue_sel__mes_unmap_queues__perform_request_on_specified_queues; packet->bitfields2.num_queues = 1; packet->bitfields3b.doorbell_offset0 = filter_param; break; - case KFD_PREEMPT_TYPE_FILTER_BY_PASID: + case KFD_UNMAP_QUEUES_FILTER_BY_PASID: packet->bitfields2.queue_sel = queue_sel__mes_unmap_queues__perform_request_on_pasid_queues; packet->bitfields3a.pasid = filter_param; break; - case KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES: + case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: packet->bitfields2.queue_sel = queue_sel__mes_unmap_queues__unmap_all_queues; break; - case KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES: + case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: /* in this case, we do not preempt static queues */ packet->bitfields2.queue_sel = queue_sel__mes_unmap_queues__unmap_all_non_static_queues; break; default: - WARN(1, "filter %d", mode); + WARN(1, "filter %d", filter); retval = -EINVAL; goto err_invalid; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 634083e340d1..9e4134c5b481 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -31,8 +31,12 @@ #include <linux/workqueue.h> #include <linux/spinlock.h> #include <linux/kfd_ioctl.h> +#include <linux/idr.h> +#include <linux/kfifo.h> #include <kgd_kfd_interface.h> +#include "amd_shared.h" + #define KFD_SYSFS_FILE_MODE 0444 #define KFD_MMAP_DOORBELL_MASK 0x8000000000000 @@ -112,11 +116,6 @@ enum cache_policy { cache_policy_noncoherent }; -enum asic_family_type { - CHIP_KAVERI = 0, - CHIP_CARRIZO -}; - struct kfd_event_interrupt_class { bool (*interrupt_isr)(struct kfd_dev *dev, const uint32_t *ih_ring_entry); @@ -125,7 +124,7 @@ struct kfd_event_interrupt_class { }; struct kfd_device_info { - unsigned int asic_family; + enum amd_asic_type asic_family; const struct kfd_event_interrupt_class *event_interrupt_class; unsigned int max_pasid_bits; unsigned int max_no_of_hqd; @@ -141,6 +140,12 @@ struct kfd_mem_obj { uint32_t *cpu_ptr; }; +struct kfd_vmid_info { + uint32_t first_vmid_kfd; + uint32_t last_vmid_kfd; + uint32_t vmid_num_kfd; +}; + struct kfd_dev { struct kgd_dev *kgd; @@ -162,6 +167,7 @@ struct kfd_dev { */ struct kgd2kfd_shared_resources shared_resources; + struct kfd_vmid_info vm_info; const struct kfd2kgd_calls *kfd2kgd; struct mutex doorbell_mutex; @@ -177,10 +183,8 @@ struct kfd_dev { unsigned int gtt_sa_num_of_chunks; /* Interrupts */ - void *interrupt_ring; - size_t interrupt_ring_size; - atomic_t interrupt_ring_rptr; - atomic_t interrupt_ring_wptr; + struct kfifo ih_fifo; + struct workqueue_struct *ih_wq; struct work_struct interrupt_work; spinlock_t interrupt_lock; @@ -218,22 +222,22 @@ void kfd_chardev_exit(void); struct device *kfd_chardev(void); /** - * enum kfd_preempt_type_filter + * enum kfd_unmap_queues_filter * - * @KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE: Preempts single queue. + * @KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: Preempts single queue. * - * @KFD_PRERMPT_TYPE_FILTER_ALL_QUEUES: Preempts all queues in the + * @KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: Preempts all queues in the * running queues list. * - * @KFD_PRERMPT_TYPE_FILTER_BY_PASID: Preempts queues that belongs to + * @KFD_UNMAP_QUEUES_FILTER_BY_PASID: Preempts queues that belongs to * specific process. * */ -enum kfd_preempt_type_filter { - KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE, - KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, - KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, - KFD_PREEMPT_TYPE_FILTER_BY_PASID +enum kfd_unmap_queues_filter { + KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE, + KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, + KFD_UNMAP_QUEUES_FILTER_BY_PASID }; /** @@ -401,7 +405,6 @@ struct scheduling_resources { struct process_queue_manager { /* data */ struct kfd_process *process; - unsigned int num_concurrent_processes; struct list_head queues; unsigned long *queue_slot_bitmap; }; @@ -417,6 +420,12 @@ struct qcm_process_device { unsigned int queue_count; unsigned int vmid; bool is_debug; + + /* This flag tells if we should reset all wavefronts on + * process termination + */ + bool reset_wavefronts; + /* * All the memory management data should be here too */ @@ -432,6 +441,13 @@ struct qcm_process_device { uint32_t sh_hidden_private_base; }; + +enum kfd_pdd_bound { + PDD_UNBOUND = 0, + PDD_BOUND, + PDD_BOUND_SUSPENDED, +}; + /* Data that is per-process-per device. */ struct kfd_process_device { /* @@ -443,6 +459,8 @@ struct kfd_process_device { /* The device that owns this data. */ struct kfd_dev *dev; + /* The process that owns this kfd_process_device. */ + struct kfd_process *process; /* per-process-per device QCM data structure */ struct qcm_process_device qpd; @@ -456,12 +474,14 @@ struct kfd_process_device { uint64_t scratch_limit; /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ - bool bound; + enum kfd_pdd_bound bound; - /* This flag tells if we should reset all - * wavefronts on process termination + /* Flag used to tell the pdd has dequeued from the dqm. + * This is used to prevent dev->dqm->ops.process_termination() from + * being called twice when it is already called in IOMMU callback + * function. */ - bool reset_wavefronts; + bool already_dequeued; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) @@ -474,7 +494,12 @@ struct kfd_process { */ struct hlist_node kfd_processes; - struct mm_struct *mm; + /* + * Opaque pointer to mm_struct. We don't hold a reference to + * it so it should never be dereferenced from here. This is + * only used for looking up processes by their mm. + */ + void *mm; struct mutex mutex; @@ -482,6 +507,8 @@ struct kfd_process { * In any process, the thread that started main() is the lead * thread and outlives the rest. * It is here because amd_iommu_bind_pasid wants a task_struct. + * It can also be used for safely getting a reference to the + * mm_struct of the process. */ struct task_struct *lead_thread; @@ -502,22 +529,16 @@ struct kfd_process { struct process_queue_manager pqm; - /* The process's queues. */ - size_t queue_array_size; - - /* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */ - struct kfd_queue **queues; - /*Is the user space process 32 bit?*/ bool is_32bit_user_mode; /* Event-related data */ struct mutex event_mutex; - /* All events in process hashed by ID, linked on kfd_event.events. */ - DECLARE_HASHTABLE(events, 4); - /* struct slot_page_header.event_pages */ - struct list_head signal_event_pages; - u32 next_nonsignal_event_id; + /* Event ID allocator and lookup */ + struct idr event_idr; + /* Event page */ + struct kfd_signal_page *signal_page; + size_t signal_mapped_size; size_t signal_event_count; bool signal_event_limit_reached; }; @@ -547,8 +568,10 @@ struct kfd_process *kfd_get_process(const struct task_struct *); struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, - struct kfd_process *p); -void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid); + struct kfd_process *p); +int kfd_bind_processes_to_device(struct kfd_dev *dev); +void kfd_unbind_processes_from_device(struct kfd_dev *dev); +void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid); struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process *p); struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, @@ -646,14 +669,14 @@ struct process_queue_node { struct list_head process_queue_list; }; +void kfd_process_dequeue_from_device(struct kfd_process_device *pdd); +void kfd_process_dequeue_from_all_devices(struct kfd_process *p); int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p); void pqm_uninit(struct process_queue_manager *pqm); int pqm_create_queue(struct process_queue_manager *pqm, struct kfd_dev *dev, struct file *f, struct queue_properties *properties, - unsigned int flags, - enum kfd_queue_type type, unsigned int *qid); int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid); int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, @@ -663,15 +686,12 @@ struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, int amdkfd_fence_wait_timeout(unsigned int *fence_addr, unsigned int fence_value, - unsigned long timeout); + unsigned int timeout_ms); /* Packet Manager */ -#define KFD_HIQ_TIMEOUT (500) - #define KFD_FENCE_COMPLETED (100) #define KFD_FENCE_INIT (10) -#define KFD_UNMAP_LATENCY (150) struct packet_manager { struct device_queue_manager *dqm; @@ -690,7 +710,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, uint32_t fence_value); int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, - enum kfd_preempt_type_filter mode, + enum kfd_unmap_queues_filter mode, uint32_t filter_param, bool reset, unsigned int sdma_engine); @@ -702,19 +722,13 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd); extern const struct kfd_event_interrupt_class event_interrupt_class_cik; extern const struct kfd_device_global_init_class device_global_init_class_cik; -enum kfd_event_wait_result { - KFD_WAIT_COMPLETE, - KFD_WAIT_TIMEOUT, - KFD_WAIT_ERROR -}; - void kfd_event_init_process(struct kfd_process *p); void kfd_event_free_process(struct kfd_process *p); int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma); int kfd_wait_on_events(struct kfd_process *p, uint32_t num_events, void __user *data, bool all, uint32_t user_timeout_ms, - enum kfd_event_wait_result *wait_result); + uint32_t *wait_result); void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, uint32_t valid_id_bits); void kfd_signal_iommu_event(struct kfd_dev *dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 9e65ce3c1967..1f5ccd28bd41 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -35,13 +35,6 @@ struct mm_struct; #include "kfd_dbgmgr.h" /* - * Initial size for the array of queues. - * The allocated size is doubled each time - * it is exceeded up to MAX_PROCESS_QUEUES. - */ -#define INITIAL_QUEUE_ARRAY_SIZE 16 - -/* * List of struct kfd_process (field kfd_process). * Unique/indexed by mm_struct* */ @@ -171,12 +164,10 @@ static void kfd_process_wq_release(struct work_struct *work) pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n", pdd->dev->id, p->pasid); - if (pdd->reset_wavefronts) - dbgdev_wave_reset_wavefronts(pdd->dev, p); + if (pdd->bound == PDD_BOUND) + amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); - amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); list_del(&pdd->per_device_list); - kfree(pdd); } @@ -189,8 +180,6 @@ static void kfd_process_wq_release(struct work_struct *work) mutex_destroy(&p->mutex); - kfree(p->queues); - kfree(p); kfree(work); @@ -202,7 +191,6 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu) struct kfd_process *p; p = container_of(rcu, struct kfd_process, rcu); - WARN_ON(atomic_read(&p->mm->mm_count) <= 0); mmdrop(p->mm); @@ -236,24 +224,26 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, mutex_lock(&p->mutex); - /* In case our notifier is called before IOMMU notifier */ - pqm_uninit(&p->pqm); - - /* Iterate over all process device data structure and check - * if we should delete debug managers and reset all wavefronts + /* Iterate over all process device data structures and if the + * pdd is in debug mode, we should first force unregistration, + * then we will be able to destroy the queues */ list_for_each_entry(pdd, &p->per_device_data, per_device_list) { - if ((pdd->dev->dbgmgr) && - (pdd->dev->dbgmgr->pasid == p->pasid)) - kfd_dbgmgr_destroy(pdd->dev->dbgmgr); - - if (pdd->reset_wavefronts) { - pr_warn("Resetting all wave fronts\n"); - dbgdev_wave_reset_wavefronts(pdd->dev, p); - pdd->reset_wavefronts = false; + struct kfd_dev *dev = pdd->dev; + + mutex_lock(kfd_get_dbgmgr_mutex()); + if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { + if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { + kfd_dbgmgr_destroy(dev->dbgmgr); + dev->dbgmgr = NULL; + } } + mutex_unlock(kfd_get_dbgmgr_mutex()); } + kfd_process_dequeue_from_all_devices(p); + pqm_uninit(&p->pqm); + mutex_unlock(&p->mutex); /* @@ -280,11 +270,6 @@ static struct kfd_process *create_process(const struct task_struct *thread) if (!process) goto err_alloc_process; - process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE, - sizeof(process->queues[0]), GFP_KERNEL); - if (!process->queues) - goto err_alloc_queues; - process->pasid = kfd_pasid_alloc(); if (process->pasid == 0) goto err_alloc_pasid; @@ -307,8 +292,6 @@ static struct kfd_process *create_process(const struct task_struct *thread) process->lead_thread = thread->group_leader; - process->queue_array_size = INITIAL_QUEUE_ARRAY_SIZE; - INIT_LIST_HEAD(&process->per_device_data); kfd_event_init_process(process); @@ -337,8 +320,6 @@ err_mmu_notifier: err_alloc_doorbells: kfd_pasid_free(process->pasid); err_alloc_pasid: - kfree(process->queues); -err_alloc_queues: kfree(process); err_alloc_process: return ERR_PTR(err); @@ -351,9 +332,9 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, list_for_each_entry(pdd, &p->per_device_data, per_device_list) if (pdd->dev == dev) - break; + return pdd; - return pdd; + return NULL; } struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, @@ -367,7 +348,9 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, INIT_LIST_HEAD(&pdd->qpd.queues_list); INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); pdd->qpd.dqm = dev->dqm; - pdd->reset_wavefronts = false; + pdd->process = p; + pdd->bound = PDD_UNBOUND; + pdd->already_dequeued = false; list_add(&pdd->per_device_list, &p->per_device_data); } @@ -393,19 +376,87 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, return ERR_PTR(-ENOMEM); } - if (pdd->bound) + if (pdd->bound == PDD_BOUND) { return pdd; + } else if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) { + pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n"); + return ERR_PTR(-EINVAL); + } err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); if (err < 0) return ERR_PTR(err); - pdd->bound = true; + pdd->bound = PDD_BOUND; return pdd; } -void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) +/* + * Bind processes do the device that have been temporarily unbound + * (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device. + */ +int kfd_bind_processes_to_device(struct kfd_dev *dev) +{ + struct kfd_process_device *pdd; + struct kfd_process *p; + unsigned int temp; + int err = 0; + + int idx = srcu_read_lock(&kfd_processes_srcu); + + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + mutex_lock(&p->mutex); + pdd = kfd_get_process_device_data(dev, p); + if (pdd->bound != PDD_BOUND_SUSPENDED) { + mutex_unlock(&p->mutex); + continue; + } + + err = amd_iommu_bind_pasid(dev->pdev, p->pasid, + p->lead_thread); + if (err < 0) { + pr_err("Unexpected pasid %d binding failure\n", + p->pasid); + mutex_unlock(&p->mutex); + break; + } + + pdd->bound = PDD_BOUND; + mutex_unlock(&p->mutex); + } + + srcu_read_unlock(&kfd_processes_srcu, idx); + + return err; +} + +/* + * Mark currently bound processes as PDD_BOUND_SUSPENDED. These + * processes will be restored to PDD_BOUND state in + * kfd_bind_processes_to_device. + */ +void kfd_unbind_processes_from_device(struct kfd_dev *dev) +{ + struct kfd_process_device *pdd; + struct kfd_process *p; + unsigned int temp; + + int idx = srcu_read_lock(&kfd_processes_srcu); + + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + mutex_lock(&p->mutex); + pdd = kfd_get_process_device_data(dev, p); + + if (pdd->bound == PDD_BOUND) + pdd->bound = PDD_BOUND_SUSPENDED; + mutex_unlock(&p->mutex); + } + + srcu_read_unlock(&kfd_processes_srcu, idx); +} + +void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) { struct kfd_process *p; struct kfd_process_device *pdd; @@ -421,31 +472,23 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) pr_debug("Unbinding process %d from IOMMU\n", pasid); - if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) - kfd_dbgmgr_destroy(dev->dbgmgr); - - pqm_uninit(&p->pqm); - - pdd = kfd_get_process_device_data(dev, p); + mutex_lock(kfd_get_dbgmgr_mutex()); - if (!pdd) { - mutex_unlock(&p->mutex); - return; + if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { + if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { + kfd_dbgmgr_destroy(dev->dbgmgr); + dev->dbgmgr = NULL; + } } - if (pdd->reset_wavefronts) { - dbgdev_wave_reset_wavefronts(pdd->dev, p); - pdd->reset_wavefronts = false; - } + mutex_unlock(kfd_get_dbgmgr_mutex()); - /* - * Just mark pdd as unbound, because we still need it - * to call amd_iommu_unbind_pasid() in when the - * process exits. - * We don't call amd_iommu_unbind_pasid() here - * because the IOMMU called us. - */ - pdd->bound = false; + pdd = kfd_get_process_device_data(dev, p); + if (pdd) + /* For GPU relying on IOMMU, we need to dequeue here + * when PASID is still bound. + */ + kfd_process_dequeue_from_device(pdd); mutex_unlock(&p->mutex); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 03bec765b03d..2bec902fc939 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -63,6 +63,25 @@ static int find_available_queue_slot(struct process_queue_manager *pqm, return 0; } +void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) +{ + struct kfd_dev *dev = pdd->dev; + + if (pdd->already_dequeued) + return; + + dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); + pdd->already_dequeued = true; +} + +void kfd_process_dequeue_from_all_devices(struct kfd_process *p) +{ + struct kfd_process_device *pdd; + + list_for_each_entry(pdd, &p->per_device_data, per_device_list) + kfd_process_dequeue_from_device(pdd); +} + int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) { INIT_LIST_HEAD(&pqm->queues); @@ -78,21 +97,14 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) void pqm_uninit(struct process_queue_manager *pqm) { - int retval; struct process_queue_node *pqn, *next; list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { - retval = pqm_destroy_queue( - pqm, - (pqn->q != NULL) ? - pqn->q->properties.queue_id : - pqn->kq->queue->properties.queue_id); - - if (retval != 0) { - pr_err("failed to destroy queue\n"); - return; - } + uninit_queue(pqn->q); + list_del(&pqn->process_queue_list); + kfree(pqn); } + kfree(pqm->queue_slot_bitmap); pqm->queue_slot_bitmap = NULL; } @@ -130,20 +142,16 @@ int pqm_create_queue(struct process_queue_manager *pqm, struct kfd_dev *dev, struct file *f, struct queue_properties *properties, - unsigned int flags, - enum kfd_queue_type type, unsigned int *qid) { int retval; struct kfd_process_device *pdd; - struct queue_properties q_properties; struct queue *q; struct process_queue_node *pqn; struct kernel_queue *kq; - int num_queues = 0; - struct queue *cur; + enum kfd_queue_type type = properties->type; + unsigned int max_queues = 127; /* HWS limit */ - memcpy(&q_properties, properties, sizeof(struct queue_properties)); q = NULL; kq = NULL; @@ -159,19 +167,18 @@ int pqm_create_queue(struct process_queue_manager *pqm, * If we are just about to create DIQ, the is_debug flag is not set yet * Hence we also check the type as well */ - if ((pdd->qpd.is_debug) || - (type == KFD_QUEUE_TYPE_DIQ)) { - list_for_each_entry(cur, &pdd->qpd.queues_list, list) - num_queues++; - if (num_queues >= dev->device_info->max_no_of_hqd/2) - return -ENOSPC; - } + if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ)) + max_queues = dev->device_info->max_no_of_hqd/2; + + if (pdd->qpd.queue_count >= max_queues) + return -ENOSPC; retval = find_available_queue_slot(pqm, qid); if (retval != 0) return retval; - if (list_empty(&pqm->queues)) { + if (list_empty(&pdd->qpd.queues_list) && + list_empty(&pdd->qpd.priv_queue_list)) { pdd->qpd.pqm = pqm; dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); } @@ -187,14 +194,14 @@ int pqm_create_queue(struct process_queue_manager *pqm, case KFD_QUEUE_TYPE_COMPUTE: /* check if there is over subscription */ if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && - ((dev->dqm->processes_count >= VMID_PER_DEVICE) || + ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) || (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) { pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); retval = -EPERM; goto err_create_queue; } - retval = create_cp_queue(pqm, dev, &q, &q_properties, f, *qid); + retval = create_cp_queue(pqm, dev, &q, properties, f, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; @@ -231,9 +238,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, list_add(&pqn->process_queue_list, &pqm->queues); if (q) { - *properties = q->properties; pr_debug("PQM done creating queue\n"); - print_queue_properties(properties); + print_queue_properties(&q->properties); } return retval; @@ -243,7 +249,8 @@ err_create_queue: err_allocate_pqn: /* check if queues list is empty unregister process from device */ clear_bit(*qid, pqm->queue_slot_bitmap); - if (list_empty(&pqm->queues)) + if (list_empty(&pdd->qpd.queues_list) && + list_empty(&pdd->qpd.priv_queue_list)) dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd); return retval; } @@ -290,9 +297,6 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (pqn->q) { dqm = pqn->q->device->dqm; retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); - if (retval != 0) - return retval; - uninit_queue(pqn->q); } @@ -300,7 +304,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) kfree(pqn); clear_bit(qid, pqm->queue_slot_bitmap); - if (list_empty(&pqm->queues)) + if (list_empty(&pdd->qpd.queues_list) && + list_empty(&pdd->qpd.priv_queue_list)) dqm->ops.unregister_process(dqm, &pdd->qpd); return retval; diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index de6fc2731b98..b72f8a43d86b 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -23,36 +23,11 @@ #ifndef __AMD_SHARED_H__ #define __AMD_SHARED_H__ -#define AMD_MAX_USEC_TIMEOUT 200000 /* 200 ms */ +#include <drm/amd_asic_type.h> struct seq_file; -/* - * Supported ASIC types - */ -enum amd_asic_type { - CHIP_TAHITI = 0, - CHIP_PITCAIRN, - CHIP_VERDE, - CHIP_OLAND, - CHIP_HAINAN, - CHIP_BONAIRE, - CHIP_KAVERI, - CHIP_KABINI, - CHIP_HAWAII, - CHIP_MULLINS, - CHIP_TOPAZ, - CHIP_TONGA, - CHIP_FIJI, - CHIP_CARRIZO, - CHIP_STONEY, - CHIP_POLARIS10, - CHIP_POLARIS11, - CHIP_POLARIS12, - CHIP_VEGA10, - CHIP_RAVEN, - CHIP_LAST, -}; +#define AMD_MAX_USEC_TIMEOUT 200000 /* 200 ms */ /* * Chip flags diff --git a/drivers/gpu/drm/amd/powerplay/Makefile b/drivers/gpu/drm/amd/powerplay/Makefile index 68b417ac94dd..8c55c6e254d9 100644 --- a/drivers/gpu/drm/amd/powerplay/Makefile +++ b/drivers/gpu/drm/amd/powerplay/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 subdir-ccflags-y += \ -I$(FULL_AMD_PATH)/powerplay/inc/ \ diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index 3c8ef4bfc205..c7e34128cbde 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -78,6 +78,9 @@ static int amd_powerplay_destroy(void *handle) { struct pp_instance *instance = (struct pp_instance *)handle; + kfree(instance->hwmgr->hardcode_pp_table); + instance->hwmgr->hardcode_pp_table = NULL; + kfree(instance->hwmgr); instance->hwmgr = NULL; @@ -1184,7 +1187,7 @@ int amd_powerplay_reset(void *handle) int ret; ret = pp_check(instance); - if (!ret) + if (ret) return ret; ret = pp_hw_fini(instance); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile index dc4bbcfe1243..824fb6fe54ae 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the 'hw manager' sub-component of powerplay. # It provides the hardware management services for the driver. diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c index 189f3b54a385..ad1f6b57884b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c @@ -961,18 +961,13 @@ static void cz_clear_voting_clients(struct pp_hwmgr *hwmgr) static int cz_start_dpm(struct pp_hwmgr *hwmgr) { - int ret = 0; struct cz_hwmgr *cz_hwmgr = (struct cz_hwmgr *)(hwmgr->backend); - unsigned long dpm_features = 0; cz_hwmgr->dpm_flags |= DPMFlags_SCLK_Enabled; - dpm_features |= SCLK_DPM_MASK; - ret = smum_send_msg_to_smc_with_parameter(hwmgr, + return smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_EnableAllSmuFeatures, - dpm_features); - - return ret; + SCLK_DPM_MASK); } static int cz_stop_dpm(struct pp_hwmgr *hwmgr) @@ -1279,27 +1274,18 @@ static int cz_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, int cz_dpm_powerdown_uvd(struct pp_hwmgr *hwmgr) { - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_UVDPowerGating)) - return smum_send_msg_to_smc(hwmgr, - PPSMC_MSG_UVDPowerOFF); + if (PP_CAP(PHM_PlatformCaps_UVDPowerGating)) + return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_UVDPowerOFF); return 0; } int cz_dpm_powerup_uvd(struct pp_hwmgr *hwmgr) { - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_UVDPowerGating)) { - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_UVDDynamicPowerGating)) { - return smum_send_msg_to_smc_with_parameter( - hwmgr, - PPSMC_MSG_UVDPowerON, 1); - } else { - return smum_send_msg_to_smc_with_parameter( - hwmgr, - PPSMC_MSG_UVDPowerON, 0); - } + if (PP_CAP(PHM_PlatformCaps_UVDPowerGating)) { + return smum_send_msg_to_smc_with_parameter( + hwmgr, + PPSMC_MSG_UVDPowerON, + PP_CAP(PHM_PlatformCaps_UVDDynamicPowerGating) ? 1 : 0); } return 0; @@ -1313,17 +1299,16 @@ int cz_dpm_update_uvd_dpm(struct pp_hwmgr *hwmgr, bool bgate) if (!bgate) { /* Stable Pstate is enabled and we need to set the UVD DPM to highest level */ - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_StablePState) - || hwmgr->en_umd_pstate) { + if (PP_CAP(PHM_PlatformCaps_StablePState) || + hwmgr->en_umd_pstate) { cz_hwmgr->uvd_dpm.hard_min_clk = ptable->entries[ptable->count - 1].vclk; smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SetUvdHardMin, - cz_get_uvd_level(hwmgr, - cz_hwmgr->uvd_dpm.hard_min_clk, - PPSMC_MSG_SetUvdHardMin)); + PPSMC_MSG_SetUvdHardMin, + cz_get_uvd_level(hwmgr, + cz_hwmgr->uvd_dpm.hard_min_clk, + PPSMC_MSG_SetUvdHardMin)); cz_enable_disable_uvd_dpm(hwmgr, true); } else { @@ -1343,17 +1328,16 @@ int cz_dpm_update_vce_dpm(struct pp_hwmgr *hwmgr) hwmgr->dyn_state.vce_clock_voltage_dependency_table; /* Stable Pstate is enabled and we need to set the VCE DPM to highest level */ - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_StablePState) - || hwmgr->en_umd_pstate) { + if (PP_CAP(PHM_PlatformCaps_StablePState) || + hwmgr->en_umd_pstate) { cz_hwmgr->vce_dpm.hard_min_clk = ptable->entries[ptable->count - 1].ecclk; smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SetEclkHardMin, - cz_get_eclk_level(hwmgr, - cz_hwmgr->vce_dpm.hard_min_clk, - PPSMC_MSG_SetEclkHardMin)); + PPSMC_MSG_SetEclkHardMin, + cz_get_eclk_level(hwmgr, + cz_hwmgr->vce_dpm.hard_min_clk, + PPSMC_MSG_SetEclkHardMin)); } else { /*Program HardMin based on the vce_arbiter.ecclk */ if (hwmgr->vce_arbiter.ecclk == 0) { @@ -1366,10 +1350,10 @@ int cz_dpm_update_vce_dpm(struct pp_hwmgr *hwmgr) } else { cz_hwmgr->vce_dpm.hard_min_clk = hwmgr->vce_arbiter.ecclk; smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SetEclkHardMin, - cz_get_eclk_level(hwmgr, - cz_hwmgr->vce_dpm.hard_min_clk, - PPSMC_MSG_SetEclkHardMin)); + PPSMC_MSG_SetEclkHardMin, + cz_get_eclk_level(hwmgr, + cz_hwmgr->vce_dpm.hard_min_clk, + PPSMC_MSG_SetEclkHardMin)); } } return 0; @@ -1377,8 +1361,7 @@ int cz_dpm_update_vce_dpm(struct pp_hwmgr *hwmgr) int cz_dpm_powerdown_vce(struct pp_hwmgr *hwmgr) { - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_VCEPowerGating)) + if (PP_CAP(PHM_PlatformCaps_VCEPowerGating)) return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_VCEPowerOFF); return 0; @@ -1386,8 +1369,7 @@ int cz_dpm_powerdown_vce(struct pp_hwmgr *hwmgr) int cz_dpm_powerup_vce(struct pp_hwmgr *hwmgr) { - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_VCEPowerGating)) + if (PP_CAP(PHM_PlatformCaps_VCEPowerGating)) return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_VCEPowerON); return 0; @@ -1871,6 +1853,33 @@ static int cz_read_sensor(struct pp_hwmgr *hwmgr, int idx, } } +static int cz_notify_cac_buffer_info(struct pp_hwmgr *hwmgr, + uint32_t virtual_addr_low, + uint32_t virtual_addr_hi, + uint32_t mc_addr_low, + uint32_t mc_addr_hi, + uint32_t size) +{ + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_DramAddrHiVirtual, + mc_addr_hi); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_DramAddrLoVirtual, + mc_addr_low); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_DramAddrHiPhysical, + virtual_addr_hi); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_DramAddrLoPhysical, + virtual_addr_low); + + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_DramBufferSize, + size); + return 0; +} + + static const struct pp_hwmgr_func cz_hwmgr_funcs = { .backend_init = cz_hwmgr_backend_init, .backend_fini = cz_hwmgr_backend_fini, @@ -1894,12 +1903,14 @@ static const struct pp_hwmgr_func cz_hwmgr_funcs = { .get_current_shallow_sleep_clocks = cz_get_current_shallow_sleep_clocks, .get_clock_by_type = cz_get_clock_by_type, .get_max_high_clocks = cz_get_max_high_clocks, + .get_temperature = cz_thermal_get_temperature, .read_sensor = cz_read_sensor, .power_off_asic = cz_power_off_asic, .asic_setup = cz_setup_asic_task, .dynamic_state_management_enable = cz_enable_dpm_tasks, .power_state_set = cz_set_power_state_tasks, .dynamic_state_management_disable = cz_disable_dpm_tasks, + .notify_cac_buffer_info = cz_notify_cac_buffer_info, }; int cz_init_function_pointers(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_overdriver.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_overdriver.c index 8ba75d43fba6..67fae834bc67 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_overdriver.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_overdriver.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "pp_overdriver.h" #include <linux/errno.h> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c index 938010842c7d..3e0b267c74a8 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c @@ -672,36 +672,20 @@ static int rv_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_p PHM_PerformanceLevelDesignation designation, uint32_t index, PHM_PerformanceLevel *level) { - const struct rv_power_state *ps; struct rv_hwmgr *data; - uint32_t level_index; - uint32_t i; - uint32_t vol_dep_record_index = 0; if (level == NULL || hwmgr == NULL || state == NULL) return -EINVAL; data = (struct rv_hwmgr *)(hwmgr->backend); - ps = cast_const_rv_ps(state); - - level_index = index > ps->level - 1 ? ps->level - 1 : index; - level->coreClock = 30000; - if (designation == PHM_PerformanceLevelDesignation_PowerContainment) { - for (i = 1; i < ps->level; i++) { - if (ps->levels[i].engine_clock > data->dce_slow_sclk_threshold) { - level->coreClock = 30000; - break; - } - } - } - - if (level_index == 0) { - vol_dep_record_index = data->clock_vol_info.vdd_dep_on_fclk->count - 1; - level->memory_clock = - data->clock_vol_info.vdd_dep_on_fclk->entries[vol_dep_record_index].clk; - } else { + if (index == 0) { level->memory_clock = data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk; + level->coreClock = data->gfx_min_freq_limit; + } else { + level->memory_clock = data->clock_vol_info.vdd_dep_on_fclk->entries[ + data->clock_vol_info.vdd_dep_on_fclk->count - 1].clk; + level->coreClock = data->gfx_max_freq_limit; } level->nonLocalMemoryFreq = 0; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 4826b2991b7e..4466469cf8ab 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -815,7 +815,7 @@ uint32_t smu7_get_xclk(struct pp_hwmgr *hwmgr) { uint32_t reference_clock, tmp; struct cgs_display_info info = {0}; - struct cgs_mode_info mode_info; + struct cgs_mode_info mode_info = {0}; info.mode_info = &mode_info; @@ -3948,10 +3948,9 @@ static int smu7_program_display_gap(struct pp_hwmgr *hwmgr) uint32_t ref_clock; uint32_t refresh_rate = 0; struct cgs_display_info info = {0}; - struct cgs_mode_info mode_info; + struct cgs_mode_info mode_info = {0}; info.mode_info = &mode_info; - cgs_get_active_displays_info(hwmgr->device, &info); num_active_displays = info.display_count; @@ -3967,6 +3966,7 @@ static int smu7_program_display_gap(struct pp_hwmgr *hwmgr) frame_time_in_us = 1000000 / refresh_rate; pre_vbi_time_in_us = frame_time_in_us - 200 - mode_info.vblank_time_us; + data->frame_time_x2 = frame_time_in_us * 2 / 100; display_gap2 = pre_vbi_time_in_us * (ref_clock / 100); @@ -4645,6 +4645,47 @@ static int smu7_avfs_control(struct pp_hwmgr *hwmgr, bool enable) return 0; } +static int smu7_notify_cac_buffer_info(struct pp_hwmgr *hwmgr, + uint32_t virtual_addr_low, + uint32_t virtual_addr_hi, + uint32_t mc_addr_low, + uint32_t mc_addr_hi, + uint32_t size) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + data->soft_regs_start + + smum_get_offsetof(hwmgr, + SMU_SoftRegisters, DRAM_LOG_ADDR_H), + mc_addr_hi); + + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + data->soft_regs_start + + smum_get_offsetof(hwmgr, + SMU_SoftRegisters, DRAM_LOG_ADDR_L), + mc_addr_low); + + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + data->soft_regs_start + + smum_get_offsetof(hwmgr, + SMU_SoftRegisters, DRAM_LOG_PHY_ADDR_H), + virtual_addr_hi); + + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + data->soft_regs_start + + smum_get_offsetof(hwmgr, + SMU_SoftRegisters, DRAM_LOG_PHY_ADDR_L), + virtual_addr_low); + + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + data->soft_regs_start + + smum_get_offsetof(hwmgr, + SMU_SoftRegisters, DRAM_LOG_BUFF_SIZE), + size); + return 0; +} + static const struct pp_hwmgr_func smu7_hwmgr_funcs = { .backend_init = &smu7_hwmgr_backend_init, .backend_fini = &smu7_hwmgr_backend_fini, @@ -4696,6 +4737,7 @@ static const struct pp_hwmgr_func smu7_hwmgr_funcs = { .avfs_control = smu7_avfs_control, .disable_smc_firmware_ctf = smu7_thermal_disable_alert, .start_thermal_controller = smu7_start_thermal_controller, + .notify_cac_buffer_info = smu7_notify_cac_buffer_info, }; uint8_t smu7_get_sleep_divider_id_from_clock(uint32_t clock, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 48de45ec0eaf..4f79c21f27ed 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -1161,6 +1161,8 @@ static void vega10_setup_default_single_dpm_table(struct pp_hwmgr *hwmgr, { int i; + dpm_table->count = 0; + for (i = 0; i < dep_table->count; i++) { if (i == 0 || dpm_table->dpm_levels[dpm_table->count - 1].value <= dep_table->entries[i].clk) { @@ -1269,10 +1271,6 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); /* Initialize Sclk DPM table based on allow Sclk values */ - data->dpm_table.soc_table.count = 0; - data->dpm_table.gfx_table.count = 0; - data->dpm_table.dcef_table.count = 0; - dpm_table = &(data->dpm_table.soc_table); vega10_setup_default_single_dpm_table(hwmgr, dpm_table, @@ -1809,6 +1807,10 @@ static int vega10_populate_all_memory_levels(struct pp_hwmgr *hwmgr) mem_channels = (cgs_read_register(hwmgr->device, reg) & DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK) >> DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; + PP_ASSERT_WITH_CODE(mem_channels < ARRAY_SIZE(channel_number), + "Mem Channel Index Exceeded maximum!", + return -1); + pp_table->NumMemoryChannels = cpu_to_le16(mem_channels); pp_table->MemoryChannelWidth = cpu_to_le16(HBM_MEMORY_CHANNEL_WIDTH * @@ -2881,6 +2883,15 @@ static int vega10_enable_dpm_tasks(struct pp_hwmgr *hwmgr) "DPM is already running right , skipping re-enablement!", return 0); + if ((data->smu_version == 0x001c2c00) || + (data->smu_version == 0x001c2d00)) { + tmp_result = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_UpdatePkgPwrPidAlpha, 1); + PP_ASSERT_WITH_CODE(!tmp_result, + "Failed to set package power PID!", + return tmp_result); + } + tmp_result = vega10_construct_voltage_tables(hwmgr); PP_ASSERT_WITH_CODE(!tmp_result, "Failed to contruct voltage tables!", @@ -3127,6 +3138,8 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, minimum_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock; if (PP_CAP(PHM_PlatformCaps_StablePState)) { + stable_pstate_sclk_dpm_percentage = + data->registry_data.stable_pstate_sclk_dpm_percentage; PP_ASSERT_WITH_CODE( data->registry_data.stable_pstate_sclk_dpm_percentage >= 1 && data->registry_data.stable_pstate_sclk_dpm_percentage <= 100, @@ -4227,7 +4240,7 @@ static void vega10_set_fan_control_mode(struct pp_hwmgr *hwmgr, uint32_t mode) vega10_fan_ctrl_stop_smc_fan_control(hwmgr); break; case AMD_FAN_CTRL_AUTO: - if (!vega10_fan_ctrl_set_static_mode(hwmgr, mode)) + if (PP_CAP(PHM_PlatformCaps_MicrocodeFanControl)) vega10_fan_ctrl_start_smc_fan_control(hwmgr); break; default: @@ -4994,6 +5007,33 @@ static int vega10_set_mclk_od(struct pp_hwmgr *hwmgr, uint32_t value) return 0; } +static int vega10_notify_cac_buffer_info(struct pp_hwmgr *hwmgr, + uint32_t virtual_addr_low, + uint32_t virtual_addr_hi, + uint32_t mc_addr_low, + uint32_t mc_addr_hi, + uint32_t size) +{ + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSystemVirtualDramAddrHigh, + virtual_addr_hi); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSystemVirtualDramAddrLow, + virtual_addr_low); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_DramLogSetDramAddrHigh, + mc_addr_hi); + + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_DramLogSetDramAddrLow, + mc_addr_low); + + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_DramLogSetDramSize, + size); + return 0; +} + static int vega10_register_thermal_interrupt(struct pp_hwmgr *hwmgr, const void *info) { @@ -5079,7 +5119,9 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .get_mclk_od = vega10_get_mclk_od, .set_mclk_od = vega10_set_mclk_od, .avfs_control = vega10_avfs_enable, + .notify_cac_buffer_info = vega10_notify_cac_buffer_info, .register_internal_thermal_interrupt = vega10_register_thermal_interrupt, + .start_thermal_controller = vega10_start_thermal_controller, }; int vega10_hwmgr_init(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c index e343df190375..f14c7611fad3 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c @@ -291,8 +291,7 @@ static int get_mm_clock_voltage_table( table_size = sizeof(uint32_t) + sizeof(phm_ppt_v1_mm_clock_voltage_dependency_record) * mm_dependency_table->ucNumEntries; - mm_table = (phm_ppt_v1_mm_clock_voltage_dependency_table *) - kzalloc(table_size, GFP_KERNEL); + mm_table = kzalloc(table_size, GFP_KERNEL); if (!mm_table) return -ENOMEM; @@ -519,8 +518,7 @@ static int get_socclk_voltage_dependency_table( sizeof(phm_ppt_v1_clock_voltage_dependency_record) * clk_dep_table->ucNumEntries; - clk_table = (phm_ppt_v1_clock_voltage_dependency_table *) - kzalloc(table_size, GFP_KERNEL); + clk_table = kzalloc(table_size, GFP_KERNEL); if (!clk_table) return -ENOMEM; @@ -554,8 +552,7 @@ static int get_mclk_voltage_dependency_table( sizeof(phm_ppt_v1_clock_voltage_dependency_record) * mclk_dep_table->ucNumEntries; - mclk_table = (phm_ppt_v1_clock_voltage_dependency_table *) - kzalloc(table_size, GFP_KERNEL); + mclk_table = kzalloc(table_size, GFP_KERNEL); if (!mclk_table) return -ENOMEM; @@ -596,8 +593,7 @@ static int get_gfxclk_voltage_dependency_table( sizeof(phm_ppt_v1_clock_voltage_dependency_record) * clk_dep_table->ucNumEntries; - clk_table = (struct phm_ppt_v1_clock_voltage_dependency_table *) - kzalloc(table_size, GFP_KERNEL); + clk_table = kzalloc(table_size, GFP_KERNEL); if (!clk_table) return -ENOMEM; @@ -663,8 +659,7 @@ static int get_pix_clk_voltage_dependency_table( sizeof(phm_ppt_v1_clock_voltage_dependency_record) * clk_dep_table->ucNumEntries; - clk_table = (struct phm_ppt_v1_clock_voltage_dependency_table *) - kzalloc(table_size, GFP_KERNEL); + clk_table = kzalloc(table_size, GFP_KERNEL); if (!clk_table) return -ENOMEM; @@ -728,8 +723,7 @@ static int get_dcefclk_voltage_dependency_table( sizeof(phm_ppt_v1_clock_voltage_dependency_record) * num_entries; - clk_table = (struct phm_ppt_v1_clock_voltage_dependency_table *) - kzalloc(table_size, GFP_KERNEL); + clk_table = kzalloc(table_size, GFP_KERNEL); if (!clk_table) return -ENOMEM; @@ -772,8 +766,7 @@ static int get_pcie_table(struct pp_hwmgr *hwmgr, sizeof(struct phm_ppt_v1_pcie_record) * atom_pcie_table->ucNumEntries; - pcie_table = (struct phm_ppt_v1_pcie_table *) - kzalloc(table_size, GFP_KERNEL); + pcie_table = kzalloc(table_size, GFP_KERNEL); if (!pcie_table) return -ENOMEM; @@ -1026,10 +1019,9 @@ static int get_vddc_lookup_table( table_size = sizeof(uint32_t) + sizeof(phm_ppt_v1_voltage_lookup_record) * max_levels; - table = (phm_ppt_v1_voltage_lookup_table *) - kzalloc(table_size, GFP_KERNEL); + table = kzalloc(table_size, GFP_KERNEL); - if (NULL == table) + if (table == NULL) return -ENOMEM; table->count = vddc_lookup_pp_tables->ucNumEntries; @@ -1138,12 +1130,12 @@ int vega10_pp_tables_initialize(struct pp_hwmgr *hwmgr) hwmgr->pptable = kzalloc(sizeof(struct phm_ppt_v2_information), GFP_KERNEL); - PP_ASSERT_WITH_CODE((NULL != hwmgr->pptable), + PP_ASSERT_WITH_CODE((hwmgr->pptable != NULL), "Failed to allocate hwmgr->pptable!", return -ENOMEM); powerplay_table = get_powerplay_table(hwmgr); - PP_ASSERT_WITH_CODE((NULL != powerplay_table), + PP_ASSERT_WITH_CODE((powerplay_table != NULL), "Missing PowerPlay Table!", return -1); result = check_powerplay_tables(hwmgr, powerplay_table); @@ -1182,7 +1174,6 @@ int vega10_pp_tables_initialize(struct pp_hwmgr *hwmgr) static int vega10_pp_tables_uninitialize(struct pp_hwmgr *hwmgr) { - int result = 0; struct phm_ppt_v2_information *pp_table_info = (struct phm_ppt_v2_information *)(hwmgr->pptable); @@ -1225,7 +1216,7 @@ static int vega10_pp_tables_uninitialize(struct pp_hwmgr *hwmgr) kfree(hwmgr->pptable); hwmgr->pptable = NULL; - return result; + return 0; } const struct pp_table_func vega10_pptable_funcs = { @@ -1238,7 +1229,7 @@ int vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr *hwmgr) const ATOM_Vega10_State_Array *state_arrays; const ATOM_Vega10_POWERPLAYTABLE *pp_table = get_powerplay_table(hwmgr); - PP_ASSERT_WITH_CODE((NULL != pp_table), + PP_ASSERT_WITH_CODE((pp_table != NULL), "Missing PowerPlay Table!", return -1); PP_ASSERT_WITH_CODE((pp_table->sHeader.format_revision >= ATOM_Vega10_TABLE_REVISION_VEGA10), diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c index 1feefac49ea9..dc3761bcb9b6 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c @@ -365,8 +365,8 @@ int vega10_thermal_get_temperature(struct pp_hwmgr *hwmgr) temp = cgs_read_register(hwmgr->device, reg); - temp = (temp & CG_MULT_THERMAL_STATUS__ASIC_MAX_TEMP_MASK) >> - CG_MULT_THERMAL_STATUS__ASIC_MAX_TEMP__SHIFT; + temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> + CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; temp = temp & 0x1ff; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h index f34ce04cfd89..82f10bdd5f07 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h @@ -71,7 +71,8 @@ extern int vega10_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr, extern int vega10_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr); extern int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr); extern int vega10_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr); - +extern int vega10_start_thermal_controller(struct pp_hwmgr *hwmgr, + struct PP_TemperatureRange *range); extern uint32_t smu7_get_xclk(struct pp_hwmgr *hwmgr); #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/fiji_pwrvirus.h b/drivers/gpu/drm/amd/powerplay/inc/fiji_pwrvirus.h deleted file mode 100644 index 9d391f0eca94..000000000000 --- a/drivers/gpu/drm/amd/powerplay/inc/fiji_pwrvirus.h +++ /dev/null @@ -1,2007 +0,0 @@ -/* - * Copyright 2015 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ -#ifndef _FIJI_PWRVIRUS_H_ -#define _FIJI_PWRVIRUS_H_ - -#define mmCP_HYP_MEC1_UCODE_ADDR 0xf81a -#define mmCP_HYP_MEC1_UCODE_DATA 0xf81b -#define mmCP_HYP_MEC2_UCODE_ADDR 0xf81c -#define mmCP_HYP_MEC2_UCODE_DATA 0xf81d - -struct PWR_Command_Table -{ - uint32_t data; - uint32_t reg; -}; -typedef struct PWR_Command_Table PWR_Command_Table; - -struct PWR_DFY_Section { - uint32_t dfy_cntl; - uint32_t dfy_addr_hi, dfy_addr_lo; - uint32_t dfy_size; - uint32_t dfy_data[]; -}; - -typedef struct PWR_DFY_Section PWR_DFY_Section; - -static const PWR_Command_Table PwrVirusTable_pre[] = -{ - { 0x100100b6, mmPCIE_INDEX }, - { 0x00000000, mmPCIE_DATA }, - { 0x100100b6, mmPCIE_INDEX }, - { 0x0300078c, mmPCIE_DATA }, - { 0x00000000, mmBIF_CLK_CTRL }, - { 0x00000001, mmBIF_CLK_CTRL }, - { 0x00000000, mmBIF_CLK_CTRL }, - { 0x00000003, mmBIF_FB_EN }, - { 0x00000000, mmBIF_FB_EN }, - { 0x00000001, mmBIF_DOORBELL_APER_EN }, - { 0x00000000, mmBIF_DOORBELL_APER_EN }, - { 0x014000c0, mmPCIE_INDEX }, - { 0x00000000, mmPCIE_DATA }, - { 0x014000c0, mmPCIE_INDEX }, - { 0x22000000, mmPCIE_DATA }, - { 0x014000c0, mmPCIE_INDEX }, - { 0x00000000, mmPCIE_DATA }, - /* - { 0x009f0090, mmMC_VM_FB_LOCATION }, - { 0x00000000, mmMC_CITF_CNTL }, - { 0x00000000, mmMC_VM_FB_LOCATION }, - { 0x009f0090, mmMC_VM_FB_LOCATION }, - { 0x00000000, mmMC_VM_FB_LOCATION }, - { 0x009f0090, mmMC_VM_FB_LOCATION }, - { 0x00000000, mmMC_VM_FB_OFFSET },*/ - { 0x00000000, mmRLC_CSIB_ADDR_LO }, - { 0x00000000, mmRLC_CSIB_ADDR_HI }, - { 0x00000000, mmRLC_CSIB_LENGTH }, - /* - { 0x00000000, mmMC_VM_MX_L1_TLB_CNTL }, - { 0x00000001, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR }, - { 0x00000000, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR }, - { 0x00000000, mmMC_VM_FB_LOCATION }, - { 0x009f0090, mmMC_VM_FB_LOCATION },*/ - { 0x00000000, mmVM_CONTEXT0_CNTL }, - { 0x00000000, mmVM_CONTEXT1_CNTL }, - /* - { 0x00000000, mmMC_VM_AGP_BASE }, - { 0x00000002, mmMC_VM_AGP_BOT }, - { 0x00000000, mmMC_VM_AGP_TOP },*/ - { 0x04000000, mmATC_VM_APERTURE0_LOW_ADDR }, - { 0x0400ff20, mmATC_VM_APERTURE0_HIGH_ADDR }, - { 0x00000002, mmATC_VM_APERTURE0_CNTL }, - { 0x0000ffff, mmATC_VM_APERTURE0_CNTL2 }, - { 0x00000001, mmATC_VM_APERTURE1_LOW_ADDR }, - { 0x00000000, mmATC_VM_APERTURE1_HIGH_ADDR }, - { 0x00000000, mmATC_VM_APERTURE1_CNTL }, - { 0x00000000, mmATC_VM_APERTURE1_CNTL2 }, - //{ 0x00000000, mmMC_ARB_RAMCFG }, - { 0x12011003, mmGB_ADDR_CONFIG }, - { 0x00800010, mmGB_TILE_MODE0 }, - { 0x00800810, mmGB_TILE_MODE1 }, - { 0x00801010, mmGB_TILE_MODE2 }, - { 0x00801810, mmGB_TILE_MODE3 }, - { 0x00802810, mmGB_TILE_MODE4 }, - { 0x00802808, mmGB_TILE_MODE5 }, - { 0x00802814, mmGB_TILE_MODE6 }, - { 0x00000000, mmGB_TILE_MODE7 }, - { 0x00000004, mmGB_TILE_MODE8 }, - { 0x02000008, mmGB_TILE_MODE9 }, - { 0x02000010, mmGB_TILE_MODE10 }, - { 0x06000014, mmGB_TILE_MODE11 }, - { 0x00000000, mmGB_TILE_MODE12 }, - { 0x02400008, mmGB_TILE_MODE13 }, - { 0x02400010, mmGB_TILE_MODE14 }, - { 0x02400030, mmGB_TILE_MODE15 }, - { 0x06400014, mmGB_TILE_MODE16 }, - { 0x00000000, mmGB_TILE_MODE17 }, - { 0x0040000c, mmGB_TILE_MODE18 }, - { 0x0100000c, mmGB_TILE_MODE19 }, - { 0x0100001c, mmGB_TILE_MODE20 }, - { 0x01000034, mmGB_TILE_MODE21 }, - { 0x01000024, mmGB_TILE_MODE22 }, - { 0x00000000, mmGB_TILE_MODE23 }, - { 0x0040001c, mmGB_TILE_MODE24 }, - { 0x01000020, mmGB_TILE_MODE25 }, - { 0x01000038, mmGB_TILE_MODE26 }, - { 0x02c00008, mmGB_TILE_MODE27 }, - { 0x02c00010, mmGB_TILE_MODE28 }, - { 0x06c00014, mmGB_TILE_MODE29 }, - { 0x00000000, mmGB_TILE_MODE30 }, - { 0x00000000, mmGB_TILE_MODE31 }, - { 0x000000a8, mmGB_MACROTILE_MODE0 }, - { 0x000000a4, mmGB_MACROTILE_MODE1 }, - { 0x00000090, mmGB_MACROTILE_MODE2 }, - { 0x00000090, mmGB_MACROTILE_MODE3 }, - { 0x00000090, mmGB_MACROTILE_MODE4 }, - { 0x00000090, mmGB_MACROTILE_MODE5 }, - { 0x00000090, mmGB_MACROTILE_MODE6 }, - { 0x00000000, mmGB_MACROTILE_MODE7 }, - { 0x000000ee, mmGB_MACROTILE_MODE8 }, - { 0x000000ea, mmGB_MACROTILE_MODE9 }, - { 0x000000e9, mmGB_MACROTILE_MODE10 }, - { 0x000000e5, mmGB_MACROTILE_MODE11 }, - { 0x000000e4, mmGB_MACROTILE_MODE12 }, - { 0x000000e0, mmGB_MACROTILE_MODE13 }, - { 0x00000090, mmGB_MACROTILE_MODE14 }, - { 0x00000000, mmGB_MACROTILE_MODE15 }, - { 0x00900000, mmHDP_NONSURFACE_BASE }, - { 0x00008000, mmHDP_NONSURFACE_INFO }, - { 0x3fffffff, mmHDP_NONSURFACE_SIZE }, - { 0x00000003, mmBIF_FB_EN }, - //{ 0x00000000, mmMC_VM_FB_OFFSET }, - { 0x00000000, mmSRBM_CNTL }, - { 0x00020000, mmSRBM_CNTL }, - { 0x80000000, mmATC_VMID0_PASID_MAPPING }, - { 0x00000000, mmATC_VMID_PASID_MAPPING_UPDATE_STATUS }, - { 0x00000000, mmRLC_CNTL }, - { 0x00000000, mmRLC_CNTL }, - { 0x00000000, mmRLC_CNTL }, - { 0xe0000000, mmGRBM_GFX_INDEX }, - { 0x00000000, mmCGTS_TCC_DISABLE }, - { 0x00000000, mmTCP_ADDR_CONFIG }, - { 0x000000ff, mmTCP_ADDR_CONFIG }, - { 0x76543210, mmTCP_CHAN_STEER_LO }, - { 0xfedcba98, mmTCP_CHAN_STEER_HI }, - { 0x00000000, mmDB_DEBUG2 }, - { 0x00000000, mmDB_DEBUG }, - { 0x00002b16, mmCP_QUEUE_THRESHOLDS }, - { 0x00006030, mmCP_MEQ_THRESHOLDS }, - { 0x01000104, mmSPI_CONFIG_CNTL_1 }, - { 0x98184020, mmPA_SC_FIFO_SIZE }, - { 0x00000001, mmVGT_NUM_INSTANCES }, - { 0x00000000, mmCP_PERFMON_CNTL }, - { 0x01180000, mmSQ_CONFIG }, - { 0x00000000, mmVGT_CACHE_INVALIDATION }, - { 0x00000000, mmSQ_THREAD_TRACE_BASE }, - { 0x0000df80, mmSQ_THREAD_TRACE_MASK }, - { 0x02249249, mmSQ_THREAD_TRACE_MODE }, - { 0x00000000, mmPA_SC_LINE_STIPPLE_STATE }, - { 0x00000000, mmCB_PERFCOUNTER0_SELECT1 }, - { 0x06000100, mmCGTT_VGT_CLK_CTRL }, - { 0x00000007, mmPA_CL_ENHANCE }, - { 0x00000001, mmPA_SC_ENHANCE }, - { 0x00ffffff, mmPA_SC_FORCE_EOV_MAX_CNTS }, - { 0x00000000, mmSRBM_GFX_CNTL }, - { 0x00000320, mmSH_MEM_CONFIG }, - { 0x00000010, mmSRBM_GFX_CNTL }, - { 0x00000320, mmSH_MEM_CONFIG }, - { 0x00000020, mmSRBM_GFX_CNTL }, - { 0x00000320, mmSH_MEM_CONFIG }, - { 0x00000030, mmSRBM_GFX_CNTL }, - { 0x00000320, mmSH_MEM_CONFIG }, - { 0x00000040, mmSRBM_GFX_CNTL }, - { 0x00000320, mmSH_MEM_CONFIG }, - { 0x00000050, mmSRBM_GFX_CNTL }, - { 0x00000320, mmSH_MEM_CONFIG }, - { 0x00000060, mmSRBM_GFX_CNTL }, - { 0x00000320, mmSH_MEM_CONFIG }, - { 0x00000070, mmSRBM_GFX_CNTL }, - { 0x00000320, mmSH_MEM_CONFIG }, - { 0x00000080, mmSRBM_GFX_CNTL }, - { 0x00000320, mmSH_MEM_CONFIG }, - { 0x00000090, mmSRBM_GFX_CNTL }, - { 0x00000320, mmSH_MEM_CONFIG }, - { 0x000000a0, mmSRBM_GFX_CNTL }, - { 0x00000320, mmSH_MEM_CONFIG }, - { 0x000000b0, mmSRBM_GFX_CNTL }, - { 0x00000320, mmSH_MEM_CONFIG }, - { 0x000000c0, mmSRBM_GFX_CNTL }, - { 0x00000320, mmSH_MEM_CONFIG }, - { 0x000000d0, mmSRBM_GFX_CNTL }, - { 0x00000320, mmSH_MEM_CONFIG }, - { 0x000000e0, mmSRBM_GFX_CNTL }, - { 0x00000320, mmSH_MEM_CONFIG }, - { 0x000000f0, mmSRBM_GFX_CNTL }, - { 0x00000320, mmSH_MEM_CONFIG }, - { 0x00000000, mmSRBM_GFX_CNTL }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmRLC_PG_CNTL }, - { 0x00000000, mmGRBM_STATUS2 }, - { 0x15000000, mmCP_ME_CNTL }, - { 0x50000000, mmCP_MEC_CNTL }, - { 0x00000000, mmSRBM_GFX_CNTL }, - { 0x0000000e, mmSH_MEM_APE1_BASE }, - { 0x0000020d, mmSH_MEM_APE1_LIMIT }, - { 0x00000000, mmSRBM_GFX_CNTL }, - { 0x00000000, mmSRBM_GFX_CNTL }, - { 0x00000000, mmSH_MEM_CONFIG }, - { 0x00000320, mmSH_MEM_CONFIG }, - { 0x00000000, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_RB_VMID }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmRLC_CNTL }, - { 0x00000000, mmRLC_CNTL }, - { 0x00000000, mmRLC_SRM_CNTL }, - { 0x00000002, mmRLC_SRM_CNTL }, - { 0x00000000, mmCP_ME_CNTL }, - { 0x15000000, mmCP_ME_CNTL }, - { 0x00000000, mmCP_MEC_CNTL }, - { 0x50000000, mmCP_MEC_CNTL }, - { 0x80000004, mmCP_DFY_CNTL }, - { 0x0840800a, mmCP_RB0_CNTL }, - { 0xf30fff0f, mmTCC_CTRL }, - { 0x00000002, mmTCC_EXE_DISABLE }, - { 0x000000ff, mmTCP_ADDR_CONFIG }, - { 0x540ff000, mmCP_CPC_IC_BASE_LO }, - { 0x000000b4, mmCP_CPC_IC_BASE_HI }, - { 0x00010000, mmCP_HYP_MEC1_UCODE_ADDR }, - { 0x00041b75, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000710e8, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000910dd, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000a1081, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000b016f, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000c0e3c, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000d10ec, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000e0188, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00101b5d, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00150a6c, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00170c5e, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x001d0c8c, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x001e0cfe, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00221408, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00370d7b, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00390dcb, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x003c142f, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x003f0b27, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00400e63, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00500f62, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00460fa7, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00490fa7, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x005811d4, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00680ad6, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00760b00, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00780b0c, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00790af7, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x007d1aba, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x007e1abe, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00591260, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x005a12fb, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00861ac7, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x008c1b01, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x008d1b34, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00a014b9, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00a1152e, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00a216fb, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00a41890, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00a31906, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00a50b14, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00621387, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x005c0b27, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00160a75, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC1_UCODE_DATA }, - { 0x00010000, mmCP_HYP_MEC2_UCODE_ADDR }, - { 0x00041b75, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000710e8, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000910dd, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000a1081, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000b016f, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000c0e3c, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000d10ec, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000e0188, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00101b5d, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00150a6c, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00170c5e, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x001d0c8c, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x001e0cfe, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00221408, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00370d7b, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00390dcb, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x003c142f, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x003f0b27, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00400e63, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00500f62, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00460fa7, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00490fa7, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x005811d4, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00680ad6, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00760b00, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00780b0c, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00790af7, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x007d1aba, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x007e1abe, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00591260, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x005a12fb, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00861ac7, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x008c1b01, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x008d1b34, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00a014b9, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00a1152e, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00a216fb, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00a41890, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00a31906, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00a50b14, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00621387, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x005c0b27, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00160a75, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x000f016a, mmCP_HYP_MEC2_UCODE_DATA }, - { 0x00000000, 0xFFFFFFFF }, -}; - -static const PWR_DFY_Section pwr_virus_section1 = { - .dfy_cntl = 0x80000004, - .dfy_addr_hi = 0x000000b4, - .dfy_addr_lo = 0x540fe800, - .dfy_data = { - 0x7e000200, 0x7e020201, 0x7e040204, 0x7e060205, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0x0a080102, 0x0a0a0701, 0x0a080102, 0x0a0a0701, - 0x0a080500, 0x0a0a0303, 0x0a080500, 0x0a0a0303, 0xbf810000, 0x00000000, 0x00000000, 0x00000000, - 0x00000005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x54106f00, 0x000400b4, 0x00004000, 0x00804fac, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - }, - .dfy_size = 416 -}; - -static const PWR_DFY_Section pwr_virus_section2 = { - .dfy_cntl = 0x80000004, - .dfy_addr_hi = 0x000000b4, - .dfy_addr_lo = 0x540fef00, - .dfy_data = { - 0xc0031502, 0x00001e00, 0x00000001, 0x00000001, 0x00000001, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - }, - .dfy_size = 16 -}; - -static const PWR_DFY_Section pwr_virus_section3 = { - .dfy_cntl = 0x80000004, - .dfy_addr_hi = 0x000000b4, - .dfy_addr_lo = 0x540ff000, - .dfy_data = { - 0xc424000b, 0x80000145, 0x94800001, 0x94c00001, 0x95000001, 0x95400001, 0x95800001, 0xdc810000, - 0xdcc10000, 0xdd010000, 0xdd410000, 0xdd810000, 0xc4080061, 0xd8400013, 0xd8000003, 0xc40c0001, - 0x24ccffff, 0x3cd08000, 0x9500fffd, 0x1cd0ffcf, 0x7d018001, 0xc4140004, 0x050c0019, 0xd8400008, - 0x84c00000, 0x80000023, 0x80000067, 0x8000006a, 0x8000006d, 0x80000079, 0x80000084, 0x8000008f, - 0x80000099, 0x800000a0, 0x800000af, 0xd8400053, 0xc4080007, 0x388c0001, 0x08880002, 0x04100003, - 0x94c00005, 0x98800003, 0x04100004, 0x8000002d, 0x04100005, 0x8c00003f, 0x8c000043, 0x28cc0000, - 0xccc00050, 0x8c000055, 0x28080001, 0xcc000004, 0x7d808001, 0xd8400013, 0xd88130b8, 0xcd400008, - 0xdc180000, 0xdc140000, 0xdc100000, 0xdc0c0000, 0xcc800005, 0xdc080000, 0x80000168, 0xc40c000e, - 0x28cc0008, 0xccc00013, 0x90000000, 0xcd013278, 0xc4113278, 0x95000001, 0x24cc0700, 0xd8400029, - 0xc4113255, 0xcd01324f, 0xc4113254, 0x1d10ffdf, 0xcd013254, 0x10cc0014, 0x1d10c017, 0x7d0d000a, - 0xd8400013, 0xd8400008, 0xcd0130b7, 0x14cc0010, 0x90000000, 0xd9c00036, 0x8000005d, 0xd8400013, - 0xc00c4000, 0xccc130b5, 0xc40c000e, 0x28cc0008, 0xccc00013, 0xc40c0021, 0x14d00011, 0x9500fffe, - 0xdc030000, 0xd800000c, 0xd800000d, 0xc40c005e, 0x94c01b10, 0xd8400013, 0x90000000, 0xc00e0080, - 0xccc130b5, 0x8000013b, 0xc00e0800, 0xccc130b5, 0x8000013b, 0xd8400053, 0x04100006, 0x8c00003f, - 0x8c000043, 0x28cc0000, 0xccc00050, 0x8c000055, 0x280c0008, 0xccc00052, 0xd8000021, 0x28180039, - 0x80000034, 0xd8400053, 0x04100007, 0x8c00003f, 0x8c000043, 0x28cc0001, 0xccc00050, 0x8c000055, - 0x280c0010, 0xccc00052, 0x28180039, 0x80000034, 0xd8400053, 0x04100008, 0x8c00003f, 0x8c000043, - 0x28cc0003, 0xccc00050, 0x8c000055, 0x280c0020, 0xccc00052, 0x28180039, 0x80000034, 0xdc030000, - 0xd8000069, 0x28080001, 0xc428000d, 0x7ca88004, 0xcc800079, 0x04280001, 0xcc00006f, 0x8000013b, - 0x80000034, 0x04100010, 0x8c00003f, 0x8c000043, 0xccc00078, 0x8c000055, 0x28180080, 0x80000034, - 0x04100001, 0xc40c000e, 0x28cc0008, 0xccc00013, 0xcd013278, 0xc4113278, 0x95000001, 0xc00c4000, - 0xc4113254, 0x1d10c017, 0xd8400013, 0xd8400008, 0xccc130b5, 0xcd0130b7, 0x8000013b, 0x95c00001, - 0x96000001, 0x96400001, 0x96800001, 0x96c00001, 0x97000001, 0x97400001, 0x97800001, 0x97c00001, - 0xdc810000, 0xc40c000c, 0xcd4c0380, 0xcdcc0388, 0x55dc0020, 0xcdcc038c, 0xce0c0390, 0x56200020, - 0xce0c0394, 0xce4c0398, 0x56640020, 0xce4c039c, 0xce8c03a0, 0x56a80020, 0xce8c03a4, 0xcecc03a8, - 0x56ec0020, 0xcecc03ac, 0xcf0c03b0, 0x57300020, 0xcf0c03b4, 0xcf4c03b8, 0x57740020, 0xcf4c03bc, - 0xcf8c03c0, 0x57b80020, 0xcf8c03c4, 0xcfcc03c8, 0x57fc0020, 0xcfcc03cc, 0xd9000033, 0xc41c0009, - 0x25dc0010, 0x95c0fffe, 0xd8400013, 0xc41c000c, 0x05dc002f, 0xcdc12009, 0xc41d200a, 0xd8400013, - 0xcc012009, 0xd9000034, 0x25e01c00, 0x12200013, 0x25e40300, 0x12640008, 0x25e800c0, 0x12a80002, - 0x25ec003f, 0x7e25c00a, 0x7eae400a, 0x7de5c00a, 0xddc10000, 0xc02ee000, 0xcec1c200, 0xc40c005f, - 0xccc00037, 0x24d000ff, 0x31100006, 0x9500007b, 0x8c000190, 0xdc1c0000, 0xd8400013, 0xcdc1c200, - 0xc40c000c, 0xc4df0388, 0xc4d7038c, 0x51540020, 0x7d5dc01a, 0xc4e30390, 0xc4d70394, 0x51540020, - 0x7d62001a, 0xc4e70398, 0xc4d7039c, 0x51540020, 0x7d66401a, 0xc4eb03a0, 0xc4d703a4, 0x51540020, - 0x7d6a801a, 0xc4ef03a8, 0xc4d703ac, 0x51540020, 0x7d6ec01a, 0xc4f303b0, 0xc4d703b4, 0x51540020, - 0x7d73001a, 0xc4f703b8, 0xc4d703bc, 0x51540020, 0x7d77401a, 0xc4fb03c0, 0xc4d703c4, 0x51540020, - 0x7d7b801a, 0xc4ff03c8, 0xc4d703cc, 0x51540020, 0x7d7fc01a, 0xdc080000, 0xcc800013, 0xc4d70380, - 0xc4080001, 0x1c88001c, 0xcd400008, 0xc40c0083, 0x94c00010, 0xdc0e0000, 0x94c0000e, 0xc40c0082, - 0x24d00001, 0x9900000b, 0x18cc01e3, 0x3cd00004, 0x95000008, 0xc40c0085, 0x18cc006a, 0x98c00005, - 0xc40c0082, 0x18cc01e3, 0x3cd00004, 0x9900fffa, 0xdc180000, 0xdc140000, 0xdc100000, 0xdc0c0000, - 0xcc800004, 0xdc080000, 0x90000000, 0xc4080001, 0x1c88001c, 0xcd400008, 0xdc180000, 0xdc140000, - 0xdc100000, 0xdc0c0000, 0xcc800004, 0xdc080000, 0x90000000, 0xd8400051, 0xc428000c, 0x04180018, - 0x32640002, 0x9a80001f, 0x9a40001e, 0xcd800013, 0xc4293265, 0x040c0000, 0x1aac0027, 0x2aa80080, - 0xce813265, 0x9ac00017, 0xd80002f1, 0x04080002, 0x08880001, 0xd8080250, 0xd8080258, 0xd8080230, - 0xd8080238, 0xd8080240, 0xd8080248, 0xd8080268, 0xd8080270, 0xd8080278, 0xd8080280, 0xd8080228, - 0xd8000367, 0x9880fff3, 0x04080010, 0x08880001, 0xd80c0309, 0xd80c0319, 0x04cc0001, 0x9880fffc, - 0x7c408001, 0x88000000, 0xc00e0100, 0xd8400013, 0xd8400008, 0xccc130b5, 0x8000016e, 0xc4180032, - 0x29980008, 0xcd800013, 0x95800001, 0x7c40c001, 0x18d0003f, 0x24d4001f, 0x24d80001, 0x155c0001, - 0x05e80180, 0x9900000b, 0x202c003d, 0xcd800010, 0xcec1325b, 0xc42d325b, 0x96c00001, 0x86800000, - 0x80000168, 0x80000aa7, 0x80000bfc, 0x800012e9, 0xc4200007, 0x0a200001, 0xce000010, 0x80001b70, - 0x7c40c001, 0x8c000190, 0xc410001b, 0xd8000032, 0xd8000031, 0x9900091a, 0x7c408001, 0x88000000, - 0x24d000ff, 0x05280196, 0x18d4fe04, 0x29540008, 0xcd400013, 0x86800000, 0x800001b4, 0x8000032b, - 0x80000350, 0x80000352, 0x8000035f, 0x80000701, 0x8000047c, 0x8000019f, 0x80000800, 0xc419325b, - 0x1d98001f, 0xcd81325b, 0x8c00003f, 0xc4140004, 0xd8400008, 0x04100002, 0x8c000043, 0x28cc0002, - 0xccc00050, 0xc43c0044, 0x27fc0003, 0x9bc00002, 0x97c00006, 0xc00c4000, 0xccc130b5, 0x8c000055, - 0xd8400013, 0xd88130b8, 0xcd400008, 0x90000000, 0xd8400008, 0xcd400013, 0x7d40c001, 0xd8400028, - 0xd8400029, 0xd9400036, 0xc4193256, 0xc41d3254, 0x15540008, 0xcd400009, 0xcd40005b, 0xcd40005e, - 0xcd40005d, 0xd840006d, 0xc421325a, 0xc42d3249, 0x11540015, 0x19a4003c, 0x1998003f, 0x1af0007d, - 0x11dc000b, 0x1264001f, 0x15dc000d, 0x7d65400a, 0x13300018, 0x1a38003f, 0x7dd5c00a, 0x7df1c00a, - 0xcd800045, 0xcdc00100, 0xc411326a, 0xc415326b, 0xc419326c, 0xc41d326d, 0xc425326e, 0xc4293279, - 0xce800077, 0xcd000056, 0xcd400057, 0xcd800058, 0xcdc00059, 0xc4193265, 0x259c8000, 0x99c00004, - 0xce40005a, 0x29988000, 0xcd813265, 0xc4113248, 0x2510000f, 0xcd000073, 0xc418000d, 0xc411326f, - 0x17300019, 0x97000009, 0x25140fff, 0x95400007, 0xd800003a, 0x8c001b6d, 0xc4153279, 0xcd400077, - 0xcd00005f, 0xd8000075, 0x26f00001, 0x15100010, 0x7d190004, 0xcd000035, 0x97000035, 0x1af07fe8, - 0xd8800013, 0xd8400010, 0xd8400008, 0xcf00000d, 0xcf00000a, 0x8c001427, 0x04340022, 0x07740001, - 0x04300010, 0xdf430000, 0x7c434001, 0x7c408001, 0xd4412e01, 0x0434001e, 0xdf430000, 0xd4400078, - 0xdf030000, 0xd4412e40, 0xd8400013, 0xcc41c030, 0xcc41c031, 0xc43dc031, 0xccc00013, 0x04343000, - 0xc4113246, 0xc41d3245, 0xcf413267, 0x51100020, 0x7dd1c01a, 0xc4353267, 0x45dc0160, 0xc810001f, - 0x1b4c0057, 0x1b700213, 0x1b740199, 0x7f4f400a, 0x7f73400a, 0x55180020, 0x2198003f, 0xd1c00025, - 0xcf400024, 0xcd000026, 0xcd800026, 0xd8400027, 0x9bc00001, 0x248dfffe, 0xd8800013, 0xccc12e00, - 0x7c434001, 0x7c434001, 0x8c00142b, 0xc43c000e, 0x1af4007d, 0x2bfc0008, 0x33740003, 0x26d80001, - 0xcfc00013, 0x1ae8003e, 0x9680000c, 0xc4253277, 0x26680001, 0x96800009, 0x2a640002, 0xce413277, - 0xd8400013, 0xc4253348, 0xce413348, 0xc4253348, 0x96400001, 0xcfc00013, 0x9b400003, 0x958000d8, - 0x80000315, 0xc4253277, 0x04303000, 0x26680001, 0xcf013267, 0xc4193246, 0xc41d3245, 0xc4313267, - 0x96800041, 0x51980020, 0x1b342010, 0x7d9d801a, 0x1714000c, 0x25540800, 0x1b30c012, 0x459801b0, - 0x7d77400a, 0x7f37000a, 0x2b300000, 0xcf00001c, 0xd180001e, 0xd8400021, 0x04240010, 0x199c01e2, - 0x7e5e4002, 0x3e5c0004, 0x3e540002, 0xc428000f, 0x9a80ffff, 0x95c00006, 0xc80c0011, 0xc8140011, - 0x54d00020, 0x55580020, 0x80000282, 0x95400015, 0xc80c0011, 0x0a640002, 0x041c0001, 0x45980008, - 0x54d00020, 0x96400004, 0xc8140011, 0x45980004, 0x041c0000, 0xcf00001c, 0xd180001e, 0xd8400021, - 0xc428000f, 0x9a80ffff, 0x99c00003, 0xc8180011, 0x80000282, 0xc8140011, 0x55580020, 0x80000282, - 0x45980004, 0xc80c0011, 0xcf00001c, 0xd180001e, 0xd8400021, 0xc428000f, 0x9a80ffff, 0xc8100011, - 0xc8140011, 0x55580020, 0xd8400013, 0xccc1334e, 0xcd01334f, 0xcd413350, 0xcd813351, 0xd881334d, - 0xcfc00013, 0xc4193273, 0xc41d3275, 0xc40d3271, 0xc4113270, 0xc4153274, 0x50cc0020, 0x7cd0c01a, - 0x7cdcc011, 0x05900008, 0xcd00006a, 0xcdc0006b, 0xc41d3272, 0x7d594002, 0x54d00020, 0xd8800013, - 0xccc12e23, 0xcd012e24, 0xcdc12e25, 0xcfc00013, 0xc4193246, 0xc41d3245, 0xc4313267, 0x15540002, - 0x51980020, 0x7d9d801a, 0xc81c001f, 0x1b340057, 0x1b280213, 0x1b300199, 0x45980198, 0x7f37000a, - 0x7f2b000a, 0x55e40020, 0xcf000024, 0xd1800025, 0xcdc00026, 0xce400026, 0xd8400027, 0xcd40000d, - 0xcd40000a, 0xc40d3249, 0x20cc003c, 0xccc13249, 0xc4113274, 0xdd430000, 0xc01e0001, 0x29dc0002, - 0x04280000, 0xd8000036, 0xcc400078, 0xcc400078, 0x2d540002, 0x95400022, 0x078c0000, 0x07d40000, - 0x8c00120d, 0x8c001239, 0x8c001232, 0x04f80000, 0x057c0000, 0xcdc00013, 0xc414000d, 0xc41c0019, - 0x7dd5c005, 0x25dc0001, 0xd840007c, 0xd8400074, 0xd8400069, 0xc40c005e, 0x94c018a6, 0xd4412e22, - 0xd800007c, 0xc40c005e, 0x94c018a2, 0x95c00007, 0xc40c0019, 0x7cd4c005, 0x24cc0001, 0x94c00008, - 0x9680fffc, 0x800002e3, 0xc40c0057, 0x7cd0c002, 0x94c00003, 0x9680fffd, 0x800002e3, 0xd8000069, - 0xcfc00013, 0xcd013273, 0xcd013275, 0xd8000074, 0xc414005e, 0x9540188f, 0xcfc00013, 0xc40d3249, - 0xc013cfff, 0x7cd0c009, 0xccc13249, 0x9680000b, 0xc40c0077, 0x38d00001, 0x99000006, 0x04cc0002, - 0xdcc30000, 0xc40c005e, 0x94c01882, 0xd4400078, 0xd800000d, 0x80000304, 0x7c41c001, 0x7c41c001, - 0xd840002f, 0xc41c0015, 0x95c0ffff, 0xd8400030, 0xc41c0016, 0x95c0ffff, 0xd8000030, 0xc41c0016, - 0x99c0ffff, 0xd800002f, 0xc41c0015, 0x99c0ffff, 0xc81c001f, 0x49980198, 0x55e40020, 0x459801a0, - 0xcf000024, 0xd1800025, 0xcdc00026, 0xce400026, 0xd8400027, 0x04302000, 0xcfc00013, 0xcf013267, - 0xc4313267, 0x96800004, 0x97000001, 0xd8000036, 0x80000329, 0xd8800013, 0xcc812e00, 0x04302000, - 0xcfc00013, 0xcf013267, 0xc4313267, 0x97000001, 0xc4193256, 0xc42d3249, 0x16ec001f, 0xd8000028, - 0xd800002b, 0x1998003e, 0xcec00031, 0xd8000036, 0xd8000010, 0x97800004, 0xd8400010, 0xce00000a, - 0x1a18003e, 0xcd800008, 0x90000000, 0xc4380004, 0xd8400008, 0xd8400013, 0xd88130b8, 0x04100000, - 0x7d43c001, 0xcd400013, 0xc4093249, 0x1888003e, 0x94800015, 0xd8400074, 0x8c000671, 0xcd400013, - 0x9a400006, 0xc419324c, 0x259c0001, 0x1598001f, 0x95c0000d, 0x9580000c, 0x99000003, 0xd8400036, - 0x04100001, 0xc40c0021, 0x14d80011, 0x24dc00ff, 0x31e00002, 0x31dc0003, 0x9580fff0, 0x9a000003, - 0x99c00002, 0xd9c00036, 0x94800004, 0xd8000074, 0xc418005e, 0x95801827, 0xcf800008, 0x90000000, - 0xd8800036, 0x90000000, 0xd8c00036, 0xc424000b, 0x32640002, 0x9a400004, 0xc4180014, 0x9580ffff, - 0xd840002f, 0xc40c0021, 0x14dc0011, 0x95c0fffe, 0xccc00037, 0x8c000190, 0x90000000, 0xd8400008, - 0xd800006d, 0xc41d3246, 0xc4193245, 0x51dc0020, 0x7d9d801a, 0xd8400028, 0xd8400029, 0xc420000b, - 0x32200002, 0x9a0000ad, 0x04200032, 0xd9000010, 0xde030000, 0xd8400033, 0x04080000, 0xc43c0009, - 0x27fc0002, 0x97c0fffe, 0xc42c0015, 0x96c0ffff, 0xd800002e, 0xc42d3249, 0x1af4003e, 0x9740004d, - 0xc428000d, 0xc4080060, 0x7ca88005, 0x24880001, 0x7f4b4009, 0x97400046, 0xc4313274, 0xc4100057, - 0x7d33400c, 0x97400009, 0x28240100, 0x7e6a4004, 0xce400079, 0x1eecffdd, 0xcec13249, 0xcf013273, - 0xcf013275, 0x800003c3, 0xc429326f, 0x1aa80030, 0x96800006, 0x28240001, 0xc428000d, 0x06a80008, - 0x7e6a8004, 0xce800035, 0xc41d3272, 0x25cc0001, 0x10cc0004, 0x19e80042, 0x25dc0006, 0x11dc0001, - 0x7e8e800a, 0x7de9c00a, 0xc40d3271, 0xc4293270, 0x50cc0020, 0x7ce8c01a, 0x7cd30011, 0x11e80007, - 0x2aa80000, 0xce80001c, 0xd300001e, 0xd8400021, 0xc428000f, 0x9a80ffff, 0xc4300011, 0x1b30003f, - 0x33300000, 0xc4240059, 0x1660001f, 0x7e320009, 0xc0328000, 0x7e72400a, 0x0430000c, 0x9a000002, - 0x04300008, 0xc02ac000, 0x7d310002, 0x17300002, 0x2aa87600, 0x7cd0c011, 0xcdc00024, 0xd0c00025, - 0xce800026, 0x04280222, 0xce800026, 0x96000002, 0xce400026, 0xd8400027, 0xc4280058, 0x22ec003d, - 0xcec13249, 0xcd013273, 0xce813275, 0xd800007b, 0xc8380018, 0x57b00020, 0x04343108, 0xc429325d, - 0x040c3000, 0x13740008, 0x2374007e, 0x32a80003, 0xccc13267, 0xc40d3267, 0x18ec0057, 0x18e40213, - 0x18cc0199, 0x7cecc00a, 0x7ce4c00a, 0x94800003, 0xd4400078, 0x800003e7, 0x04200022, 0xde030000, - 0xccc00024, 0xd1800025, 0xcf400026, 0xd4400026, 0xd8400027, 0x04200010, 0xde030000, 0xccc00024, - 0x45980104, 0xd1800025, 0xd4400026, 0xcf800026, 0xcf000026, 0xd8400027, 0x49980104, 0x9a80000a, - 0xc81c001f, 0x45980168, 0x55e00020, 0xccc00024, 0xd1800025, 0xcdc00026, 0xce000026, 0xd8400027, - 0x800003f2, 0x8c000448, 0xcd400013, 0x040c2000, 0xccc13267, 0xc40d3267, 0x94c00001, 0xc40d3249, - 0x18cc003e, 0xd8400030, 0xc42c0016, 0x96c0ffff, 0xd8000030, 0xc42c0016, 0x9ac0ffff, 0xd800002f, - 0xc42c0015, 0x9ac0ffff, 0xd8400034, 0xc4300025, 0xc4340024, 0xc4380081, 0xcf813279, 0xcf41326e, - 0xcf01326d, 0x94c0000d, 0x254c0700, 0xc424001e, 0x10cc0010, 0x1a641fe8, 0x28cc0726, 0x2a640200, - 0xd8400013, 0xccc1237b, 0x2264003f, 0xcd400013, 0xd8813260, 0xce41325b, 0xc4240033, 0xc4280034, - 0xd9000036, 0xd8000010, 0x8c001427, 0x96400006, 0xde430000, 0xce40000c, 0xc40c005e, 0x94c01755, - 0xd4400078, 0x9680000a, 0xce80000a, 0x06a80002, 0xd8400010, 0xde830000, 0xce80000d, 0xc40c005e, - 0x94c0174c, 0xd4400078, 0xd8000010, 0x8c00142b, 0xc4393265, 0x2bb80040, 0xd8400032, 0xcf813265, - 0xc4200012, 0x9a00ffff, 0xc4100044, 0x19180024, 0xc8100072, 0x551c003f, 0x99c00003, 0x95800010, - 0x8000043d, 0xc00c8000, 0xd840006c, 0x28200000, 0x8000043f, 0xc00c4000, 0x282000f0, 0xcd400013, - 0xd8400008, 0xc4113255, 0xcd01324f, 0xd8400013, 0xd88130b8, 0xccc130b5, 0xce000053, 0x90000000, - 0x195c00e8, 0xc4100004, 0x2555fff0, 0xc0360001, 0x042c0000, 0x29540001, 0xd8400008, 0x04240000, - 0x04280004, 0xc420000b, 0x32200002, 0x9a000009, 0xcd400013, 0xcec1c200, 0xc5e124dc, 0x0aa80001, - 0x7ef6c001, 0x7e624001, 0x96000001, 0x9a80fff9, 0xc02ee000, 0xcd400013, 0x2555fff0, 0xcec1c200, - 0x29540008, 0xc81c001f, 0xcd400013, 0x55e00020, 0xc42d3255, 0xc4353259, 0xd8013260, 0x45980158, - 0xccc00024, 0xd1800025, 0xcdc00026, 0xce000026, 0xd8400027, 0x49980158, 0x45980170, 0xc4200012, - 0x16200010, 0x9a00fffe, 0xccc00024, 0xd1800025, 0xc429324f, 0xce400026, 0xce800026, 0xcec00026, - 0xcf400026, 0xd8400027, 0xcd000008, 0x90000000, 0xc40d325b, 0x7d43c001, 0x195400e8, 0x1154000a, - 0x18dc00e8, 0x05e80488, 0x18d0006c, 0x18f807f0, 0x18e40077, 0x18ec0199, 0x7e6e400a, 0x86800000, - 0x8000048e, 0x80000494, 0x800004de, 0x80000685, 0x80000686, 0x800006ac, 0x1ccc001f, 0xccc1325b, - 0xc411325d, 0x251001ef, 0xcd01325d, 0x90000000, 0xc4293254, 0x1264000a, 0xc4300004, 0x7d79400a, - 0x7e7a400a, 0x52a8001e, 0x15180001, 0x7d69401a, 0x202c007d, 0xcec1325b, 0x95000008, 0x95800028, - 0xc42d3267, 0xc4193246, 0xc41d3245, 0x1aec0028, 0xc40d325c, 0x800004cc, 0xc42d3256, 0xc419324e, - 0x26e8003f, 0x1aec003e, 0x12f4000e, 0xc41d324d, 0xc40d324f, 0x7d75401a, 0x04100002, 0x7d290004, - 0x7f8f4001, 0x7f52800f, 0x51980020, 0x7d9d801a, 0x50e00002, 0x51980008, 0x9a800002, 0x800004d1, - 0x7d0dc002, 0x6665fc00, 0x7e5e401a, 0xcec00008, 0x7da1c011, 0xd140000b, 0xd1c00002, 0x2a644000, - 0xce400002, 0x7f534002, 0x6665fc00, 0x7e76401a, 0xd1800002, 0xce400002, 0x800004d7, 0xc42d325a, - 0xc4193258, 0x1aec003e, 0xc41d3257, 0xc4213259, 0x12f4000e, 0x7d75401a, 0x51980020, 0x52200002, - 0x7d9d801a, 0xcec00008, 0x7da1c011, 0xd140000b, 0xd1c00002, 0x2a644000, 0xce400002, 0x202c003d, - 0xcf000008, 0xcfc00013, 0xcec1325b, 0xc42d325b, 0x96c00001, 0x90000000, 0xc4193260, 0x259c0007, - 0x15980004, 0x05e804e3, 0x86800000, 0x800004e7, 0x800004f0, 0x80000505, 0x8000016a, 0xc4380004, - 0xcfc00013, 0xd8400008, 0xc435325d, 0xd801325b, 0x277401ef, 0xcf41325d, 0xcf800008, 0x90000000, - 0xc4380004, 0xd8400008, 0x8c000671, 0x9640fff4, 0x17e00008, 0xc418000d, 0xce000009, 0xd84131db, - 0xcf800008, 0xcd800009, 0xc430001e, 0xcfc00013, 0xc42d325b, 0x1b301ff8, 0x2b300400, 0x2330003f, - 0x26edf000, 0x7ef2c00a, 0xd8413260, 0xcec1325b, 0x90000000, 0x05a80507, 0x86800000, 0x8000050c, - 0x80000528, 0x8000057d, 0x800005c2, 0x800005f3, 0xc4380004, 0xd8400008, 0x8c000671, 0xcfc00013, - 0x9a400012, 0x1bd400e8, 0xc42c004a, 0xcd40005e, 0xc41c004d, 0xcec0005e, 0x99c0000c, 0xc4100019, - 0x7d150005, 0x25100001, 0x99000008, 0x8c00063b, 0xcfc00013, 0xc4113277, 0x2511fffd, 0xcd013277, - 0xd801326f, 0x80000624, 0x04240012, 0x1be00fe4, 0xce413260, 0xce000066, 0xcf800008, 0x90000000, - 0xd8400068, 0xc4380004, 0xd8400008, 0x8c000671, 0xcfc00013, 0x9a400013, 0x1bd400e8, 0xc42c004a, - 0xcd40005e, 0xc41c004d, 0xcec0005e, 0x99c0000d, 0xc4100019, 0x7d150005, 0x25100001, 0x99000009, - 0xd8400067, 0x8c00063b, 0xcfc00013, 0xc4113277, 0x2511fffd, 0xcd013277, 0xd801326f, 0x80000624, - 0x1bd400e8, 0xc42c0060, 0x7ed6c005, 0x26ec0001, 0xc4113271, 0xc4153270, 0xc4193272, 0xc41d3273, - 0x04280022, 0x51100020, 0x7d51401a, 0xc4113274, 0xc4213275, 0xc4253276, 0xc4313248, 0xd1400061, - 0x2730000f, 0x13300010, 0x7db1800a, 0xcd800060, 0x96c00002, 0x05dc0008, 0xcdc00062, 0x042c3000, - 0xcd000063, 0xce000064, 0xce400065, 0xcec13267, 0xc42d3246, 0xc4313245, 0xc4353267, 0xce813260, - 0x52ec0020, 0x7ef2c01a, 0xc820001f, 0x1b700057, 0x1b680213, 0x1b740199, 0x46ec0188, 0x7f73400a, - 0x7f6b400a, 0x56240020, 0xcf400024, 0xd2c00025, 0xce000026, 0xce400026, 0x042c2000, 0xd8400027, - 0xc418000d, 0x17e00008, 0xce000009, 0xcec13267, 0xc42d3267, 0x26e01000, 0x9a00fffe, 0xd8400013, - 0xd9c131fc, 0xcd800009, 0xcf800008, 0x96c00001, 0x90000000, 0xc4380004, 0xd8400008, 0xc4113277, - 0xc41c000b, 0xc420000c, 0x11dc0002, 0x7de1c001, 0x11dc0008, 0x29dc0001, 0x25140001, 0x191807e4, - 0x192007ec, 0x95400004, 0xd8400013, 0xcdc1334a, 0xcfc00013, 0x9580000e, 0x09980001, 0x041c0001, - 0x95800005, 0x09980001, 0x51dc0001, 0x69dc0001, 0x9980fffd, 0x7de20014, 0x561c0020, 0xd8400013, - 0xce013344, 0xcdc13345, 0xcfc00013, 0x95400022, 0x042c3000, 0xcec13267, 0xc42d3246, 0xc4313245, - 0xc4353267, 0xd8400013, 0xc425334d, 0x26640001, 0x9640fffe, 0xc419334e, 0xc41d334f, 0xc4213350, - 0xc4253351, 0x52ec0020, 0x1b680057, 0x7ef2c01a, 0x1b700213, 0x1b740199, 0x46ec01b0, 0x7f6b400a, - 0x7f73400a, 0xcfc00013, 0xcf400024, 0xd2c00025, 0xcd800026, 0xcdc00026, 0xce000026, 0xce400026, - 0x042c2000, 0xd8400027, 0xcec13267, 0xc42d3267, 0x96c00001, 0x04280032, 0xce813260, 0xd8800068, - 0xcf800008, 0x90000000, 0xc4380004, 0xd8400008, 0x2010007d, 0xcd01325b, 0xc411325b, 0x1910003e, - 0x9500fffe, 0x04100040, 0xcd00001b, 0xd8400021, 0xc410000f, 0x9900ffff, 0x04100060, 0xcd00001b, - 0xd8400021, 0xc410000f, 0x9900ffff, 0xcfc00013, 0x2010003d, 0xcd01325b, 0xc4113277, 0x25140001, - 0x191807e4, 0x9540000b, 0x2511fffd, 0xcd013277, 0xc41c000b, 0xc420000c, 0x11dc0002, 0x7de1c001, - 0x11dc0008, 0xd8400013, 0xcdc1334a, 0xcfc00013, 0x95800005, 0xd8400013, 0xd8013344, 0xd8013345, - 0xcfc00013, 0xc4180050, 0xc41c0052, 0x04280042, 0xcd813273, 0xcdc13275, 0xce813260, 0xd9000068, - 0xd8400067, 0xcf800008, 0x90000000, 0x07d40000, 0x8c00120d, 0x8c00124f, 0x8c001232, 0x057c0000, - 0x042c3000, 0xc4380004, 0xcfc00013, 0xd8400008, 0xcec13267, 0xc42d3246, 0xc4313245, 0xc4353267, - 0x52ec0020, 0x7ef2c01a, 0x1b680057, 0x1b700213, 0x1b740199, 0xc820001f, 0x46ec0190, 0x7f6b400a, - 0x7f73400a, 0x56240020, 0xcf400024, 0xd2c00025, 0xce000026, 0xce400026, 0x042c2000, 0xd8400027, - 0xcfc00013, 0xcec13267, 0xc4153249, 0x2154003d, 0xc41c0019, 0x1bd800e8, 0x7dd9c005, 0x25dc0001, - 0xc42c004a, 0xcd80005e, 0xc420004d, 0xcec0005e, 0x11dc0010, 0x7e1e000a, 0xcd413249, 0xce01326f, - 0x28340001, 0x05980008, 0x7f598004, 0xcd800035, 0x1be800e8, 0xc42c004a, 0xce80005e, 0xd801327a, - 0xd800005f, 0xd8000075, 0xd800007f, 0xc424004c, 0xce41326e, 0xcec0005e, 0x28240100, 0x7e6a4004, - 0xce400079, 0xc435325d, 0x277401ef, 0x04240020, 0xce41325e, 0xd801325b, 0xd8013260, 0xcf41325d, - 0xda000068, 0xcf800008, 0x90000000, 0xc4113277, 0xc41c000b, 0xc420000c, 0x11dc0002, 0x7de1c001, - 0x11dc0008, 0x29dc0001, 0x25140001, 0x9540002d, 0xd8400013, 0xcdc1334a, 0xcfc00013, 0x042c3000, - 0xcec13267, 0xc42d3246, 0xc4313245, 0xc4353267, 0xd8400013, 0xc425334d, 0x26640001, 0x9640fffe, - 0xc419334e, 0xc41d334f, 0xc4213350, 0xc4253351, 0x52ec0020, 0x1b680057, 0x7ef2c01a, 0x1b700213, - 0x1b740199, 0x46ec01b0, 0x7f6b400a, 0x7f73400a, 0xcfc00013, 0xcf400024, 0xd2c00025, 0xcd800026, - 0xcdc00026, 0xce000026, 0xce400026, 0x042c2000, 0xd8400027, 0xcec13267, 0xc42d3267, 0x96c00001, - 0xc41c000b, 0xc420000c, 0x11dc0002, 0x7de1c001, 0x11dc0008, 0xd8400013, 0xcdc1334a, 0xcfc00013, - 0x90000000, 0xc430000b, 0x33300002, 0x04240000, 0x9b000010, 0x1be000e8, 0x042c0000, 0xc0360001, - 0x04280004, 0xd8400013, 0xcec1c200, 0xc63124dc, 0x0aa80001, 0x7ef6c001, 0x7e724001, 0x97000001, - 0x9a80fff9, 0xc02ee000, 0xd8400013, 0xcec1c200, 0x90000000, 0x90000000, 0xc4253260, 0x7fc14001, - 0xc40d3249, 0x18cc003e, 0x98c00005, 0x194c1c03, 0xccc0003b, 0xc40c002d, 0x80000697, 0xc420004a, - 0x194c00e8, 0xccc0005e, 0xc40c004c, 0xc431326d, 0x27301fff, 0xce00005e, 0x7cf0c00d, 0x98c00003, - 0x8c0007e0, 0x95c00008, 0xc430001e, 0x1b301ff8, 0x2b300400, 0x2330003f, 0xcd400013, 0xcf01325b, - 0x90000000, 0xcd400013, 0xd801325b, 0xc411325d, 0x251001ef, 0xcd01325d, 0x25100007, 0x31100005, - 0x9900008e, 0xc40c0007, 0xd9000010, 0x8000075e, 0x202c007d, 0xcec1325b, 0xc4293265, 0xc4353254, - 0x26a9feff, 0xc4380004, 0xd8400008, 0x1374000b, 0xc40c000d, 0xd8000009, 0x1774000d, 0xd8400013, - 0xc41d30b8, 0xcfc00013, 0x95c00008, 0xc411325d, 0xd801325b, 0xccc00009, 0xcf800008, 0x251001ef, - 0xcd01325d, 0x90000000, 0xce813265, 0xcf400100, 0xc00ac006, 0xc00e0000, 0x28880700, 0x28cc0014, - 0x8c0006de, 0x14cc0010, 0x30d4000f, 0x04cc0001, 0x10cc0010, 0x28cc0014, 0x99400009, 0xd8400013, - 0xc41530b8, 0xcfc00013, 0xc4193265, 0x19980028, 0x99400003, 0x99800002, 0x800006c8, 0xcfc00013, - 0xc411325d, 0xd801325b, 0xcf800008, 0x251001ef, 0xcd01325d, 0x90000000, 0x15600008, 0xce000009, - 0xc8380023, 0xc4180081, 0x11a00002, 0x7fa38011, 0xc4100026, 0x05980008, 0x7d1a0002, 0x282c2002, - 0x3e280008, 0xcec00013, 0xc4300027, 0x042c0008, 0xd3800025, 0xcf000024, 0x202400d0, 0x7ca48001, - 0xcc800026, 0xccc00026, 0x28240006, 0xcc000026, 0x0a640001, 0x9a40fffe, 0x9a800004, 0x32280000, - 0x9a800002, 0x9a000000, 0xd8400027, 0x24d8003f, 0xd840003c, 0xcec0003a, 0xd8800013, 0xcd81a2a4, - 0x90000000, 0xc41d325d, 0x25dc0007, 0xc40d3249, 0x18cc003e, 0x94c0000a, 0xc420004a, 0x194c00e8, - 0xccc0005e, 0xc40c004c, 0xc431326d, 0x27301fff, 0xce00005e, 0x7cf0c00d, 0x80000712, 0x194c1c03, - 0xccc0003b, 0xc40c002d, 0x05e80714, 0x86800000, 0x8000071c, 0x80000720, 0x80000747, 0x8000071d, - 0x800007c4, 0x80000732, 0x80000745, 0x80000744, 0x90000000, 0x98c00006, 0x8000072e, 0x90000000, - 0x98c00003, 0x8c0007e0, 0x95c0000c, 0xcd400013, 0xc4253265, 0x2a64008c, 0xce413265, 0xc430001e, - 0x1b301fe8, 0x2b300400, 0x2330003f, 0xd8013260, 0xcf01325b, 0x90000000, 0xc40c0007, 0xd9000010, - 0x04240000, 0x8000075e, 0x98c0fff1, 0x8c0007e0, 0x95c00002, 0x80000723, 0xcd400013, 0xc41f02f1, - 0x95c00004, 0xd8013247, 0xd801325d, 0x80000743, 0xd8813247, 0xd801325d, 0xc4100004, 0xd8400008, - 0xd8400013, 0xd88130b8, 0xcd000008, 0x90000000, 0x04100001, 0x98c0ffde, 0x8000072e, 0x98c00003, - 0x8c0007e0, 0x95c00012, 0xc4340004, 0xd8400008, 0x15600008, 0xc418000d, 0xce000009, 0xd8400013, - 0xd84131db, 0xcf400008, 0xcd800009, 0xc430001e, 0x1b301ff8, 0x2b300400, 0x2330003f, 0xcd400013, - 0xd8413260, 0xcf01325b, 0x90000000, 0xc40c0007, 0xd9000010, 0x04240000, 0xcd400013, 0x041c3000, - 0xcdc13267, 0xc41d3267, 0xc41d3265, 0x25dc8000, 0x95c00007, 0xc41c004a, 0x195800e8, 0xcd80005e, - 0xc418004c, 0xcd81326e, 0xcdc0005e, 0xc41d3265, 0x25dd7fff, 0xcdc13265, 0xc41d3246, 0xc4193245, - 0xc42d3267, 0x51e00020, 0x7e1a001a, 0x46200200, 0x04283247, 0x04300033, 0x1af80057, 0x1af40213, - 0x042c000c, 0x7f7b400a, 0x7f6f400a, 0xcf400024, 0xd2000025, 0xcd800026, 0xcdc00026, 0xc6990000, - 0x329c325d, 0x99c00008, 0x329c3269, 0x99c00006, 0x329c3267, 0x95c00005, 0xc01defff, 0x7d9d8009, - 0x8000078a, 0x25980000, 0x0b300001, 0x06a80001, 0xcd800026, 0x9b00fff2, 0xd8400027, 0xc43c0012, - 0x9bc0ffff, 0xcd400013, 0xd801325b, 0xc431325a, 0xc03e7ff0, 0x7f3f0009, 0xcf01325a, 0xc4313249, - 0x1f30001f, 0xcf013249, 0xc03e4000, 0xcfc13254, 0xcd400013, 0xd8013254, 0xc431325d, 0xd801324f, - 0xd8013255, 0xd8013247, 0xd801325d, 0x1b300028, 0x8c00120d, 0x8c001219, 0x8c001232, 0xc4380004, - 0xd8400008, 0xd8400013, 0x9900000d, 0xd88130b8, 0x9700000b, 0xc43d30b5, 0x1bf0003a, 0x9b000b80, - 0x203c003a, 0xc430000e, 0x27300700, 0x13300014, 0x2b300001, 0xcf0130b7, 0xcfc130b5, 0x46200008, - 0xcf400024, 0xd2000025, 0xd8000026, 0xd8400027, 0x043c2000, 0xcd400013, 0xcfc13267, 0xc43d3267, - 0x9bc00001, 0xccc00010, 0xcf800008, 0x90000000, 0xc4080007, 0xd9000010, 0xc4193260, 0x259c0003, - 0x31dc0003, 0x95c00014, 0x040c3000, 0xd8400008, 0xccc13267, 0xc40d3267, 0x18ec0057, 0x18e40213, - 0x18cc0199, 0x7cecc00a, 0x7ce4c00a, 0xc4193246, 0xc41d3245, 0x51980020, 0x7d9d801a, 0x8c000448, - 0xcd400013, 0x040c2000, 0xccc13267, 0xc40d3267, 0x94c00001, 0xcc800010, 0xd801325d, 0x90000000, - 0xc418000b, 0x31980002, 0x041c0000, 0x9980001c, 0x19580066, 0x15600008, 0x040c0000, 0xc0120001, - 0x11980003, 0x04240004, 0x7da18001, 0xc4200007, 0xc4340004, 0xd9000010, 0xd8400008, 0xd8400013, - 0xccc1c200, 0xc41d24db, 0x7cd0c001, 0x0a640001, 0x7dd9c005, 0x25dc0001, 0x99c00002, 0x9a40fff8, - 0xc418005e, 0x9580137b, 0xc00ee000, 0xd8400013, 0xccc1c200, 0xce000010, 0xcf400008, 0x90000000, - 0xd840004f, 0xc4113269, 0x19080070, 0x190c00e8, 0x2510003f, 0x2518000f, 0xcd813268, 0x05a80809, - 0x86800000, 0x8000080e, 0x8000080f, 0x80000898, 0x80000946, 0x800009e1, 0x80000a5a, 0x04a80811, - 0x86800000, 0x80000815, 0x80000834, 0x8000085e, 0x8000085e, 0x04341001, 0xcf400013, 0xc4380004, - 0xd8400008, 0xc42d3045, 0xcec1c091, 0x31300021, 0x9700000b, 0xd84002f1, 0xd8400013, 0xc43130b8, - 0x27300001, 0xc4293059, 0x56a8001f, 0x7f2b000a, 0xcf800008, 0x9b000241, 0x8000084a, 0xcf400013, - 0xd8400008, 0xc43130b6, 0x9b000003, 0xc02f0001, 0xcec130b6, 0xc4252087, 0x5668001a, 0x26a80005, - 0x9a80fffd, 0xcf400013, 0xd80130b6, 0x8000084a, 0xc4380004, 0xd8400008, 0x04341001, 0xcf400013, - 0xc431ecaa, 0x27300080, 0x9b000010, 0xc02e0001, 0xcec130b6, 0xcf400013, 0xd80130b6, 0x31300021, - 0x9700000a, 0xd84002f1, 0xd8400013, 0xc43130b8, 0x27300001, 0xc4293059, 0x56a8001f, 0x7f2b000a, - 0xcf800008, 0x9b00021d, 0xdd410000, 0x040c0005, 0xd84802e9, 0x8c001a41, 0xc43b02f1, 0x9b800006, - 0xc4380004, 0xd8400008, 0xd8400013, 0xd88130b8, 0xcf800008, 0xcec80278, 0x56f00020, 0xcf080280, - 0x8c001608, 0xdc140000, 0xcd400013, 0xd8813247, 0xd80802e9, 0x8000085e, 0xcd400013, 0x31100011, - 0x950001fa, 0xc02e0001, 0x2aec0008, 0xc01c0020, 0xc0180001, 0xc00c0007, 0x11a40006, 0x7de6000a, - 0x10e40008, 0x7e26000a, 0x7e2e000a, 0xce000013, 0xc4113254, 0x1d10ffdf, 0x2110003e, 0xcd013254, - 0xd801324f, 0xd8013255, 0x1d10ff9e, 0xcd013254, 0xd8013247, 0xd801325d, 0xd801325e, 0xc0245301, - 0xce413249, 0xd801325f, 0xc425326c, 0xc0121fff, 0x29108eff, 0x7e524009, 0xce41326c, 0xc425325a, - 0xc0127ff0, 0x7e524009, 0xce41325a, 0xc425325b, 0xc0131fff, 0x7e524009, 0xce41325b, 0xd801326d, - 0xd801326e, 0xd8013279, 0x94c00003, 0x08cc0001, 0x80000866, 0xc00c0007, 0x95800003, 0x09980001, - 0x80000866, 0xc0100010, 0x7dd2400c, 0x9a400004, 0xc0180003, 0x7dd1c002, 0x80000866, 0x80000a5a, - 0x04a8089a, 0x86800000, 0x8000089e, 0x800008fa, 0x80000945, 0x80000945, 0x31300022, 0x97000007, - 0xc4380004, 0xd8400008, 0xd8400013, 0xc43130b8, 0x27300001, 0xcf800008, 0xcd400013, 0x04183000, - 0xcd813267, 0xc4113246, 0xc4193245, 0x51100020, 0x7d91801a, 0x459801e0, 0xc4313267, 0x2738000f, - 0x1b342010, 0x172c000c, 0x26ec0800, 0x1b30c012, 0x7ef7400a, 0x7f37000a, 0x2b300000, 0xcf00001c, - 0xd180001e, 0xd8400021, 0xc42c000f, 0x9ac0ffff, 0xc8300011, 0x97000036, 0x45980008, 0xd180001e, - 0xd8400021, 0xc42c000f, 0x9ac0ffff, 0xc8340011, 0x9740002f, 0xc43c0004, 0xd8400008, 0xd8400013, - 0x13b80001, 0xc79d3300, 0xc7a13301, 0x96000001, 0xd8393300, 0xc0260001, 0xce793301, 0xc424005e, - 0x964012a4, 0x7c028009, 0x9740001c, 0x27580001, 0x99800004, 0x57740001, 0x06a80400, 0x800008d2, - 0xc4180006, 0x9980ffff, 0x29640001, 0xce40001a, 0x242c0000, 0x06ec0400, 0x57740001, 0x27580001, - 0x9980fffd, 0xc02620c0, 0xce41c078, 0xce81c080, 0xcc01c081, 0xcf01c082, 0x57240020, 0xce41c083, - 0xc0260400, 0x7e6e400a, 0xce41c084, 0x7eae8001, 0x7f2f0011, 0x800008d2, 0xc4180006, 0x9980ffff, - 0xcdf93300, 0xce393301, 0xcfc00008, 0xcd400013, 0xc43c0004, 0xd8400008, 0x04182000, 0xcd813267, - 0xcfc00008, 0x80000903, 0x31240022, 0x96400008, 0x04100001, 0xc4380004, 0xd8400008, 0xd8400013, - 0xc43130b8, 0x27300001, 0xcf800008, 0xc4af0280, 0xc4b30278, 0x52ec0020, 0x7ef2c01a, 0x7ec30011, - 0x32f80000, 0x9b800011, 0x043c0020, 0x04280000, 0x67180001, 0x0bfc0001, 0x57300001, 0x95800006, - 0x8c001628, 0x9a400003, 0xd981325d, 0x80000915, 0xd9c1325d, 0x06a80001, 0x9bc0fff6, 0x7f818001, - 0x8c001606, 0x7d838001, 0x94800010, 0xcd400013, 0xc41d3259, 0xc421325a, 0x16240014, 0x12640014, - 0x1a2801f0, 0x12a80010, 0x2620ffff, 0x7e2a000a, 0x7de1c001, 0x7e5e400a, 0x9b800002, 0x2264003f, - 0xce41325a, 0xd8013259, 0xc40c0007, 0xd9000010, 0x8c00075e, 0xc4af0228, 0x043c0000, 0x66d80001, - 0x95800010, 0x04300002, 0x1330000d, 0x13f40014, 0x7f73400a, 0xcf400013, 0x04380040, 0xcf80001b, - 0xd8400021, 0xc438000f, 0x9b80ffff, 0x04380060, 0xcf80001b, 0xd8400021, 0xc438000f, 0x9b80ffff, - 0x07fc0001, 0x56ec0001, 0x33e80010, 0x9680ffec, 0x80000a5a, 0x80000a5a, 0x04a80948, 0x86800000, - 0x8000094c, 0x8000099b, 0x800009e0, 0x800009e0, 0xc43c0004, 0xd8400008, 0xcd400013, 0x04183000, - 0xcd813267, 0xc4113246, 0xc4193245, 0x51100020, 0x7d91801a, 0x459801e0, 0xc4313267, 0x2738000f, - 0x1b342010, 0x172c000c, 0x26ec0800, 0x1b30c012, 0x7ef7400a, 0x7f37000a, 0x2b300000, 0xcf00001c, - 0xd180001e, 0xd8400021, 0xc42c000f, 0x9ac0ffff, 0xc8300011, 0x97000033, 0x45980008, 0xd180001e, - 0xd8400021, 0xc42c000f, 0x9ac0ffff, 0xc8340011, 0x9740002c, 0xd8400013, 0x13b80001, 0xc79d3300, - 0xc7a13301, 0x96000001, 0xd8393300, 0xc0260001, 0xce793301, 0xc424005e, 0x964011fe, 0x7c028009, - 0x9740001c, 0x27580001, 0x99800004, 0x57740001, 0x06a80400, 0x80000978, 0xc4180006, 0x9980ffff, - 0x29640001, 0xce40001a, 0x242c0000, 0x06ec0400, 0x57740001, 0x27580001, 0x9980fffd, 0xc0260010, - 0xce41c078, 0xcf01c080, 0x57240020, 0xce41c081, 0xce81c082, 0xcc01c083, 0xc0260800, 0x7e6e400a, - 0xce41c084, 0x7eae8001, 0x7f2f0011, 0x80000978, 0xc4180006, 0x9980ffff, 0xcdf93300, 0xce393301, - 0x04182000, 0xcd813267, 0xcfc00008, 0xcd400013, 0xc4193246, 0xc41d3245, 0x51980020, 0x7dda801a, - 0x7d41c001, 0x7e838011, 0xd84802e9, 0x8c001802, 0x469c0390, 0xc4313267, 0x04183000, 0xcd813267, - 0x1b342010, 0x172c000c, 0x26ec0800, 0x1b30c012, 0x7ef7400a, 0x7f37000a, 0x2b300000, 0xcf00001c, - 0x45dc0004, 0xd1c0001e, 0xd8400021, 0xc418000f, 0x9980ffff, 0xc4200011, 0x45dc0004, 0xd1c0001e, - 0xd8400021, 0xc418000f, 0x9980ffff, 0xc4240011, 0x45dc0004, 0xd1c0001e, 0xd8400021, 0xc418000f, - 0x9980ffff, 0xc4280011, 0x45dc0004, 0xd1c0001e, 0xd8400021, 0xc418000f, 0x9980ffff, 0xc42c0011, - 0x45dc0004, 0xd1c0001e, 0xd8400021, 0xc418000f, 0x9980ffff, 0xc4300011, 0x45dc0004, 0xd1c0001e, - 0xd8400021, 0xc418000f, 0x9980ffff, 0xc4340011, 0x45dc0004, 0xd1c0001e, 0xd8400021, 0xc418000f, - 0x9980ffff, 0xc4380011, 0xcd400013, 0x04182000, 0xcd813267, 0x043c0001, 0x8c0014df, 0x80000a5a, - 0x80000a5a, 0x31280014, 0xce8802ef, 0x9a800062, 0x31280034, 0x9a800060, 0x04a809e8, 0x86800000, - 0x800009ec, 0x80000a45, 0x80000a59, 0x80000a59, 0xcd400013, 0xc4113246, 0xc4193245, 0x51100020, - 0x7d91801a, 0x45980400, 0xc4b30258, 0xc4a70250, 0x53300020, 0x7e72401a, 0xc4313267, 0x1b342010, - 0x172c000c, 0x26ec0800, 0x1b30c012, 0x7ef7400a, 0x7f37000a, 0x2b300000, 0xcf00001c, 0x042c0020, - 0x66740001, 0x97400041, 0xcd400013, 0x04383000, 0xcf813267, 0xc4393267, 0x9b800001, 0xd180001e, - 0xd8400021, 0xc438000f, 0x9b80ffff, 0xc4300011, 0x1b38007e, 0x33b40003, 0x9b400003, 0x4598001c, - 0x9740002f, 0x45980004, 0xd180001e, 0xd8400021, 0xc438000f, 0x9b80ffff, 0xc40c0011, 0x45980004, - 0xd180001e, 0xd8400021, 0xc438000f, 0x9b80ffff, 0xc4100011, 0x45980004, 0xd180001e, 0xd8400021, - 0xc438000f, 0x9b80ffff, 0xc4340011, 0xcf4002eb, 0x45980004, 0xd180001e, 0xd8400021, 0xc438000f, - 0x9b80ffff, 0xc4340011, 0xcf4002ec, 0x45980004, 0xd180001e, 0xd8400021, 0xc438000f, 0x9b80ffff, - 0xc4340011, 0xcf4002ed, 0x45980004, 0xd180001e, 0xd8400021, 0xc438000f, 0x9b80ffff, 0xc4340011, - 0xcf4002ee, 0x45980004, 0xcd400013, 0x04382000, 0xcf813267, 0xd84802e9, 0x8c001715, 0xcd400013, - 0x04382000, 0xcf813267, 0x56640001, 0x0aec0001, 0x9ac0ffbc, 0xc4380004, 0xd8400008, 0x04341001, - 0xcf400013, 0x94800005, 0xc431ecaa, 0x27300080, 0x97000002, 0x80000a55, 0xc43130b6, 0x233c0032, - 0xcfc130b6, 0xcf400013, 0xcf0130b6, 0xc49302ef, 0x99000003, 0xcd400013, 0xd8413247, 0xcf800008, - 0x80000a5a, 0x80000a5a, 0xcd400013, 0x04180001, 0x5198001f, 0xcd813268, 0xc4193269, 0x2598000f, - 0x9980fffe, 0xd80002f1, 0xcd400013, 0xd8013268, 0xd800004f, 0x90000000, 0xcd400013, 0x04380001, - 0x53b8001f, 0x7db9801a, 0xcd813268, 0x80000a5e, 0xd8400029, 0xc40c005e, 0x94c01106, 0xd8800013, - 0xcc412e01, 0xcc412e02, 0xcc412e03, 0xcc412e00, 0x80000aa7, 0xd8400029, 0xc40c005e, 0x94c010fd, - 0x7c40c001, 0x50640020, 0x7ce4c01a, 0xd0c00072, 0xc80c0072, 0x58e801fc, 0x12a80009, 0x2aa80000, - 0xd0c0001e, 0xce80001c, 0xd8400021, 0xc424000f, 0x9a40ffff, 0x04240010, 0x18dc01e2, 0x7e5e4002, - 0x3e5c0003, 0x3e540002, 0x95c00006, 0xc8180011, 0xc8100011, 0xc8100011, 0x55140020, 0x80000aa2, - 0x9540000a, 0xc8180011, 0x44cc0008, 0x55900020, 0xd0c0001e, 0xd8400021, 0xc424000f, 0x9a40ffff, - 0xc4140011, 0x80000aa2, 0x44cc0004, 0xc4180011, 0xd0c0001e, 0xd8400021, 0xc424000f, 0x9a40ffff, - 0xc8100011, 0x55140020, 0xd8800013, 0xcd812e01, 0xcd012e02, 0xcd412e03, 0xcc412e00, 0xc428000e, - 0x2aa80008, 0xce800013, 0xc4253249, 0x2264003f, 0xce413249, 0xce800013, 0xc4253249, 0x96400001, - 0xd800002a, 0xc410001a, 0xc40c0021, 0xc4140028, 0x95000005, 0x1e64001f, 0xce800013, 0xce413249, - 0x80001b70, 0x14d00010, 0xc4180030, 0xc41c0007, 0x99000004, 0x99400009, 0x9980000c, 0x80000ab1, - 0xccc00037, 0x8c000190, 0xc420001c, 0xd8000032, 0x9a0010ac, 0x80000aa7, 0xd880003f, 0x95c00002, - 0xd8c0003f, 0x80001082, 0xd8800040, 0x95c00002, 0xd8c00040, 0x800010de, 0xc010ffff, 0x18d403f7, - 0x7d0cc009, 0xc41b0367, 0x7d958004, 0x7d85800a, 0xdc1e0000, 0x90000000, 0xc424000b, 0x32640002, - 0x7c40c001, 0x18d001fc, 0x05280adc, 0x86800000, 0x80000af1, 0x80000adf, 0x80000ae7, 0x8c000ace, - 0xd8c00013, 0x96400002, 0xd8400013, 0xcd8d2000, 0x99c00010, 0x7c408001, 0x88000000, 0x18d803f7, - 0xc010ffff, 0x7d0cc009, 0x04140000, 0x11940014, 0x29544001, 0x9a400002, 0x29544003, 0xcd400013, - 0x80000af4, 0xd8c00013, 0x96400002, 0xd8400013, 0xd44d2000, 0x7c408001, 0x88000000, 0xc424000b, - 0x32640002, 0x7c40c001, 0xd8c00013, 0x96400002, 0xd8400013, 0xd44dc000, 0x7c408001, 0x88000000, - 0x7c40c001, 0x18d0003c, 0x95000006, 0x8c000ace, 0xd8800013, 0xcd8d2c00, 0x99c00003, 0x80000b0a, - 0xd8800013, 0xd44d2c00, 0x7c408001, 0x88000000, 0x7c40c001, 0x28148004, 0x24d800ff, 0xccc00019, - 0xcd400013, 0xd4593240, 0x7c408001, 0x88000000, 0xd8400029, 0xc40c005e, 0x94c0105e, 0x7c410001, - 0x50540020, 0x7c418001, 0x2198003f, 0x199c0034, 0xc40c0007, 0x95c00028, 0xc428000e, 0x2aa80008, - 0xce800013, 0xc42d324f, 0xc4313255, 0x7ef3400c, 0x9b400021, 0xd800002a, 0x80001b70, 0xc40c0007, - 0x14e80001, 0x9a8000af, 0xd9000010, 0x041c0002, 0x042c01c8, 0x8c000d61, 0xccc00010, 0xd8400029, - 0xc40c005e, 0x94c01043, 0x7c410001, 0x50540020, 0x7c418001, 0x18a01fe8, 0x3620005c, 0x9a00000e, - 0x2464003f, 0xd8400013, 0xc6290ce7, 0x16ac001f, 0x96c00004, 0x26ac003f, 0x7ee6c00d, 0x96c00005, - 0x06200001, 0x2620000f, 0x9a00fff8, 0x8000016a, 0xce000367, 0xc424005e, 0x9640102e, 0xc428000e, - 0x199c0037, 0x19a00035, 0x2aa80008, 0xce800013, 0x95c0005d, 0xd800002a, 0xc42d3256, 0xc431325a, - 0x2330003f, 0x16f8001f, 0x9780000d, 0xc4253248, 0xc035f0ff, 0x7e764009, 0x19b401f8, 0x13740008, - 0x7e76400a, 0xce800013, 0xce413248, 0xcf01325a, 0xce800013, 0xc431325a, 0x97000001, 0x7d15001a, - 0xd1000072, 0xc8100072, 0x55140020, 0x199c0034, 0xd8400010, 0xd8400029, 0x9b800004, 0x1ae4003e, - 0xce400008, 0x80000b7c, 0xc4353254, 0x16a80008, 0x1aec003c, 0x19a4003f, 0x12a80015, 0x12ec001f, - 0x1374000b, 0x7eae800a, 0xc02e4000, 0x1774000d, 0x7eae800a, 0xce400008, 0x7f6b400a, 0x95c00005, - 0xc43d3248, 0x1bfc01e8, 0x13fc0018, 0x7dbd800a, 0x1d98ff15, 0x592c00fc, 0xcd80000a, 0x12e00016, - 0x7da1800a, 0x592c007e, 0x12e00015, 0x7da1800a, 0xd1000001, 0xcd800001, 0x11a0000c, 0x1264001e, - 0x1620000c, 0x7e26000a, 0x7e32000a, 0x12e4001b, 0x7e26000a, 0x5924007e, 0x12640017, 0x7e26000a, - 0x19a4003c, 0x12640018, 0x7e26000a, 0xd800002a, 0xce01325a, 0xcd013257, 0xcd413258, 0xc429325a, - 0xc40c005e, 0x94c00fdb, 0x96800001, 0x95c00003, 0x7c40c001, 0x7c410001, 0x9780f5ca, 0xcf400100, - 0xc40c0007, 0xd9000010, 0x8c00120d, 0x8c001219, 0x8c001232, 0xccc00010, 0x8c001b6d, 0x7c408001, - 0x88000000, 0xc42d324e, 0xc431324d, 0x52ec0020, 0x7ef2c01a, 0xc435324f, 0xc4293256, 0x52ec0008, - 0x07740003, 0x04240002, 0x269c003f, 0x7e5e4004, 0x7f67000f, 0x97000003, 0x7f674002, 0x0b740001, - 0x53740002, 0x7ef6c011, 0x1ab42010, 0x1ab8c006, 0x16a8000c, 0x26a80800, 0x2b740000, 0x7f7b400a, - 0x7f6b400a, 0xcf40001c, 0xd2c0001e, 0xd8400021, 0xc438000f, 0x9b80ffff, 0xc4180011, 0x9a000003, - 0x8c000bec, 0x80000b47, 0xc42c001d, 0xc4313256, 0x1b34060b, 0x1b300077, 0x7f37000a, 0x13300017, - 0x04340100, 0x26ec00ff, 0xc03a8004, 0x7ef6c00a, 0x7f3b000a, 0x7ef2c00a, 0xcec1325b, 0x80000c16, - 0xc40c0032, 0xc410001d, 0x28cc0008, 0xccc00013, 0xc415325b, 0x7c418001, 0x7c418001, 0x18580037, - 0x251000ff, 0xc421325d, 0x262001ef, 0xce01325d, 0x99800004, 0x7d15400a, 0xcd41325b, 0x80000168, - 0x1d54001f, 0xcd41325b, 0x7c408001, 0x88000000, 0xc428000b, 0xc42c000c, 0x12a80001, 0x26a80004, - 0x7eae800a, 0xc40c0021, 0xc4340028, 0x14f00010, 0xc4380030, 0xc43c0007, 0xcd280200, 0xcd680208, - 0xcda80210, 0x9b00000c, 0x9b400014, 0x9b800017, 0xc428000b, 0xc42c000c, 0x12a80001, 0x26a80004, - 0x7eae800a, 0xc6930200, 0xc6970208, 0xc69b0210, 0x90000000, 0x17300001, 0x9b000005, 0xccc00037, - 0x8c000190, 0xd8000032, 0x90000000, 0xd8000028, 0xd800002b, 0x80000168, 0xd900003f, 0x97c00002, - 0xd940003f, 0x80001082, 0xd9000040, 0x97c00002, 0xd9400040, 0x800010de, 0xc40c0021, 0x14fc0011, - 0x24f800ff, 0x33b80001, 0x97c0fffc, 0x9b800007, 0xccc00037, 0x8c000190, 0xd8000032, 0xd8000028, - 0xd800002b, 0x80001b70, 0xc4380004, 0xd8400008, 0xd8400013, 0xd88130b8, 0x04100000, 0x04140000, - 0xc418000e, 0x29980008, 0x7d83c001, 0xcd800013, 0xc4093249, 0x1888003e, 0x94800020, 0xd8400074, - 0x8c000671, 0x9a400009, 0xc418000e, 0x29980008, 0xcd800013, 0xc419324c, 0x259c0001, 0x1598001f, - 0x95c00016, 0x95800015, 0x99000003, 0xd8400036, 0x04100001, 0xc40c0021, 0x14d80011, 0x24e000ff, - 0x321c0002, 0x32200001, 0x9580ffee, 0x99c00014, 0x96000004, 0xccc00037, 0x04140001, 0x80000c30, - 0x9480000a, 0xd8000074, 0xc418005e, 0x95800f29, 0xcf800008, 0x80000c16, 0x94800004, 0xd8000074, - 0xc418005e, 0x95800f23, 0xd9c00036, 0x99400002, 0xccc00037, 0xcf800008, 0x80000c16, 0x94800004, - 0xd8000074, 0xc418005e, 0x95800f1a, 0xccc00037, 0xd8800036, 0x80001b70, 0x041c0003, 0x042c01c8, - 0x8c000d61, 0xc4200007, 0xc40c0077, 0x94c00001, 0x7c418001, 0xc428000e, 0x9600f502, 0x0a200001, - 0x98c0f500, 0x2aa80008, 0xce000010, 0x9a000f05, 0xce800013, 0xc431325a, 0xc42d3256, 0x1f30001f, - 0x16e4001f, 0xcf01325a, 0xc431325a, 0x97000001, 0x9640f4f4, 0xc434000b, 0x33740002, 0x9b40f4f1, - 0xc4353254, 0x16a80008, 0x1aec003c, 0x12a80015, 0x12ec001f, 0x1374000b, 0x7eae800a, 0xc02e4000, - 0x1774000d, 0x7eae800a, 0x7f6b400a, 0xcf400100, 0x12780001, 0x2bb80001, 0xc00ac005, 0xc00e0002, - 0x28cc8000, 0x28884900, 0x28cc0014, 0x80000ff3, 0xc43c0007, 0x7c40c001, 0x17fc0001, 0xd8400013, - 0x9bc00004, 0xd8400029, 0xc424005e, 0x96400ee1, 0xcc41c40a, 0xcc41c40c, 0xcc41c40d, 0x7c414001, - 0x24d0007f, 0x15580010, 0x255400ff, 0xcd01c411, 0xcd81c40f, 0xcd41c40e, 0xcc41c410, 0x7c414001, - 0x7c418001, 0x04200000, 0x18e80033, 0x18ec0034, 0xcc41c414, 0xcc41c415, 0xcd81c413, 0xcd41c412, - 0x18dc0032, 0x7c030011, 0x7c038011, 0x95c00027, 0x96c00002, 0xc431c417, 0xc435c416, 0x96800004, - 0x96c00002, 0xc439c419, 0xc43dc418, 0xc41c000e, 0x29dc0008, 0xcdc00013, 0xcf413261, 0x96c00002, - 0xcf013262, 0x96800004, 0xcfc13263, 0x96c00002, 0xcf813264, 0x18dc0030, 0xc43c0007, 0x95c00017, - 0x17fc0001, 0x9ac00005, 0x7d77000c, 0x9bc00015, 0x9700000a, 0x80000cd6, 0x51b80020, 0x53300020, - 0x7f97801a, 0x7f37001a, 0x7f3b000c, 0x9bc0000d, 0x97800002, 0x80000cd6, 0x9a000018, 0xd8400013, - 0x28200001, 0x80000ca7, 0x18dc0031, 0x95c00003, 0xc435c40b, 0x9740fffd, 0xd800002a, 0x80001b70, - 0xc4280032, 0x2aa80008, 0xce800013, 0xc40d325b, 0x97000002, 0x800012c2, 0xc438001d, 0x1bb81ff0, - 0x7f8cc00a, 0xccc1325b, 0xc411325d, 0x251001ef, 0xcd01325d, 0x80001b70, 0xc428000e, 0xc43c0007, - 0x2aa80008, 0xc438001d, 0xce800013, 0x13f4000c, 0x9bc00006, 0xc43d3256, 0x1bf0060b, 0x1bfc0077, - 0x7ff3c00a, 0x80000cf4, 0xc43d325a, 0x1bfc0677, 0x13fc0017, 0x04300100, 0x1bb81fe8, 0x7f73400a, - 0xc032800b, 0x7fb7800a, 0x7ff3c00a, 0x7ffbc00a, 0xcfc1325b, 0x80000c16, 0xc43c0007, 0x7c40c001, - 0x18d42011, 0x17fc0001, 0x18d001e8, 0x24cc007f, 0x7cd4c00a, 0x9bc00004, 0xd8400029, 0xc428005e, - 0x96800e6c, 0x7c414001, 0x50580020, 0x7d59401a, 0xd1400072, 0xc8140072, 0x596001fc, 0x12200009, - 0x7ce0c00a, 0x7c418001, 0x505c0020, 0x7d9d801a, 0x7c41c001, 0x50600020, 0x7de1c01a, 0x7c420001, - 0xccc0001b, 0xd140001d, 0xd180001f, 0xd1c00020, 0xd8400021, 0x95000010, 0x04300000, 0xc428000f, - 0x9a80ffff, 0xc8240010, 0x7e5e800c, 0x9bc00015, 0x9a80000c, 0x9b000024, 0x28300001, 0x122c0004, - 0x06ec0001, 0x0aec0001, 0x9ac0ffff, 0xd8400021, 0x80000d1f, 0xc428000f, 0x9a80ffff, 0xc8240010, - 0x566c0020, 0xc428000e, 0x2aa80008, 0xce800013, 0xce413261, 0xcec13262, 0xd800002a, 0x80001b70, - 0xc4340032, 0x2b740008, 0xcf400013, 0xc40d325b, 0x96800005, 0x566c0020, 0xce413261, 0xcec13262, - 0x800012c2, 0xc438001d, 0x1bb81fe8, 0x7f8cc00a, 0xccc1325b, 0xc411325d, 0x251001ef, 0xcd01325d, - 0x80001b70, 0xc43c0007, 0xc438001d, 0xc428000e, 0x2aa80008, 0xce800013, 0x13f4000c, 0x9bc00006, - 0xc43d3256, 0x1bf0060b, 0x1bfc0077, 0x7ff3c00a, 0x80000d57, 0xc43d325a, 0x1bfc0677, 0x13fc0017, - 0x04300100, 0x1bb81fe8, 0x7f73400a, 0xc0328009, 0x7fb7800a, 0x7ff3c00a, 0x7ffbc00a, 0xcfc1325b, - 0x80000c16, 0xc43c000e, 0x2bfc0008, 0xcfc00013, 0xc4253246, 0xc4113245, 0x04143000, 0xcd413267, - 0x52640020, 0x7e51001a, 0xc4153267, 0x7d2d0011, 0x19640057, 0x19580213, 0x19600199, 0x7da6400a, - 0x7e26400a, 0xd1000025, 0xce400024, 0xcdc00026, 0xd8400027, 0x04142000, 0xcfc00013, 0xcd413267, - 0xc4153267, 0x99400001, 0x90000000, 0x7c40c001, 0x18d001e8, 0x18d40030, 0x18d80034, 0x05280d83, - 0x7c420001, 0x7c424001, 0x86800000, 0x80000d8a, 0x8000016a, 0x80000d95, 0x80000db1, 0x8000016a, - 0x80000d95, 0x80000dbc, 0x11540010, 0x7e010001, 0x8c00187c, 0x7d75400a, 0xcd400013, 0xd4610000, - 0x9580f3d8, 0xc439c040, 0x97800001, 0x7c408001, 0x88000000, 0xd8000016, 0x526c0020, 0x18e80058, - 0x7e2ec01a, 0xd2c00072, 0xc82c0072, 0x5ae0073a, 0x7ea2800a, 0x9940000a, 0xce800024, 0xd2c00025, - 0xd4400026, 0xd8400027, 0x9580f3c6, 0xc4380012, 0x9b80ffff, 0x7c408001, 0x88000000, 0xdc3a0000, - 0x0bb80001, 0xce800024, 0xd2c00025, 0xcc400026, 0xd8400027, 0x9b80fffb, 0x9980fff5, 0x7c408001, - 0x88000000, 0xc02a0001, 0x2aa80001, 0x16200002, 0xce800013, 0xce01c405, 0xd441c406, 0x9580f3b1, - 0xc439c409, 0x97800001, 0x7c408001, 0x88000000, 0xc424000b, 0x32640002, 0x9a40000b, 0x11540010, - 0x29540002, 0xcd400013, 0xd4610000, 0x9580f3a5, 0xd8400013, 0xc439c040, 0x97800001, 0x7c408001, - 0x88000000, 0xd4400078, 0x80000168, 0xd8400029, 0xc40c005e, 0x94c00da7, 0x7c40c001, 0x50500020, - 0x7cd0c01a, 0xd0c00072, 0xc8280072, 0x5aac007e, 0x12d80017, 0x7c41c001, 0x7d9d800a, 0x56a00020, - 0x2620ffff, 0x7da1800a, 0x51980020, 0x7e82400a, 0x7e58c01a, 0x19d4003d, 0x28182002, 0x99400030, - 0x8c00104f, 0xc430000d, 0xc4340035, 0xd800002a, 0xcd800013, 0xc8140023, 0xc4180081, 0x13300005, - 0xc011000f, 0xc4240004, 0x11a00002, 0x7c908009, 0x12640004, 0x7d614011, 0xc4100026, 0x05980008, - 0x7ca4800a, 0x7d1a0002, 0x7cb0800a, 0x3e280008, 0x20880188, 0x54ec0020, 0x7cb4800a, 0xc4300027, - 0x04380008, 0xd1400025, 0xcf000024, 0x20240090, 0x7ca48001, 0xcc800026, 0xccc00026, 0xcec00026, - 0xcec00026, 0x28240004, 0xcc000026, 0x0a640001, 0x9a40fffe, 0x9a800005, 0x32280000, 0x9a800002, - 0x9a000000, 0x7c018001, 0xd8400027, 0xd8000016, 0xcf80003a, 0xd901a2a4, 0x80001037, 0xc418000e, - 0x29980008, 0xcd800013, 0xc421326c, 0x1624001f, 0x9a40fffe, 0xd841325f, 0xd8800033, 0xc43c0009, - 0x27fc0004, 0x97c0fffe, 0xd8000039, 0xd0c00038, 0xc43c0022, 0x9bc0ffff, 0xd8800034, 0xc429325f, - 0x26ac0001, 0x9ac0fffe, 0x26ac0002, 0x96c00003, 0xd800002a, 0x80001b70, 0xc43c0007, 0xc430001e, - 0xd8800033, 0x13f4000c, 0x1b301ff0, 0x2b300300, 0x2330003f, 0x7f37000a, 0x9680000b, 0xc43c0009, - 0x27fc0004, 0x97c0fffe, 0xd8400039, 0xd0c00038, 0xc43c0022, 0x9bc0ffff, 0xcf01325b, 0xd8800034, - 0x80000c16, 0xd8800034, 0x8c0001a2, 0x80001b70, 0xcc80003b, 0x24b00008, 0xc418000e, 0x1330000a, - 0x18ac0024, 0x2b304000, 0x7c40c001, 0xcec00008, 0x18a800e5, 0x1d980008, 0x12a80008, 0x7da9800a, - 0x29980008, 0xcd800013, 0xc4113249, 0x1910003e, 0x99000002, 0xd840003d, 0x7c410001, 0xd4400078, - 0x51100020, 0xcf01326c, 0x7cd0c01a, 0xc421326c, 0x12a80014, 0x2220003f, 0x7e2a000a, 0xcd800013, - 0xce01326c, 0xd8800033, 0xc43c0009, 0x27fc0004, 0x97c0fffe, 0xd8000039, 0xd0c00038, 0xc43c0022, - 0x9bc0ffff, 0xd8800034, 0x80001190, 0x7c40c001, 0x18dc003d, 0x95c00004, 0x041c0001, 0x042c01c8, - 0x8c000d61, 0x18d40030, 0x18d001e8, 0x18fc0034, 0x24e8000f, 0x06a80e71, 0x7c418001, 0x7c41c001, - 0x86800000, 0x80000edd, 0x80000e91, 0x80000e91, 0x80000ea1, 0x80000eaa, 0x80000e7c, 0x80000e7f, - 0x80000e7f, 0x80000e87, 0x80000e8f, 0x8000016a, 0x51dc0020, 0x7d9e001a, 0x80000ee6, 0xc420000e, - 0x2a200008, 0xce000013, 0xc4213262, 0xc4253261, 0x52200020, 0x7e26001a, 0x80000ee6, 0xc420000e, - 0x2a200008, 0xce000013, 0xc4213264, 0xc4253263, 0x52200020, 0x7e26001a, 0x80000ee6, 0xc820001f, - 0x80000ee6, 0x18e82005, 0x51e00020, 0x2aa80000, 0x7da1801a, 0xd1800072, 0xc8180072, 0x59a001fc, - 0x12200009, 0x7ea2800a, 0xce80001c, 0xd180001e, 0xd8400021, 0xc428000f, 0x9a80ffff, 0xc8200011, - 0x80000ee6, 0x15980002, 0xd8400013, 0xcd81c400, 0xc421c401, 0x95400041, 0xc425c401, 0x52640020, - 0x7e26001a, 0x80000ee6, 0x31ac2580, 0x9ac00011, 0x31ac260c, 0x9ac0000f, 0x31ac0800, 0x9ac0000d, - 0x31ac0828, 0x9ac0000b, 0x31ac2440, 0x9ac00009, 0x31ac2390, 0x9ac00007, 0x31ac0093, 0x9ac00005, - 0x31ac31dc, 0x9ac00003, 0x31ac31e6, 0x96c00004, 0xc4340004, 0xd8400008, 0x80000ede, 0x39ac7c06, - 0x3db07c00, 0x9ac00003, 0x97000002, 0x80000ebc, 0x39acc337, 0x3db0c330, 0x9ac00003, 0x97000002, - 0x80000ebc, 0x39acc335, 0x3db0c336, 0x9ac00003, 0x97000002, 0x80000ebc, 0x39ac9002, 0x3db09001, - 0x9ac00003, 0x97000002, 0x80000ebc, 0x39ac9012, 0x3db09011, 0x9ac00003, 0x97000002, 0x80000ebc, - 0x39acec70, 0x3db0ec6f, 0x9ac00003, 0x97000002, 0x80000ebc, 0xc4340004, 0xd8400013, 0xc5a10000, - 0x95400005, 0x05980001, 0xc5a50000, 0x52640020, 0x7e26001a, 0xcf400008, 0x05280eea, 0x7c418001, - 0x7c41c001, 0x86800000, 0x80000ef1, 0x8000016a, 0x80000efe, 0x80000f11, 0x80000f2e, 0x80000efe, - 0x80000f1f, 0xc4340004, 0xd8400013, 0xce190000, 0x95400005, 0x05980001, 0x56200020, 0xce190000, - 0xcf400008, 0x97c0f26f, 0xc439c040, 0x97800001, 0x7c408001, 0x88000000, 0x51ec0020, 0x18e80058, - 0x7daec01a, 0xd2c00072, 0xc82c0072, 0x5af8073a, 0x7eba800a, 0xd2c00025, 0xce800024, 0xce000026, - 0x95400003, 0x56240020, 0xce400026, 0xd8400027, 0x97c0f25c, 0xc4380012, 0x9b80ffff, 0x7c408001, - 0x88000000, 0xc02a0001, 0x2aa80001, 0x15980002, 0xce800013, 0xcd81c405, 0xce01c406, 0x95400003, - 0x56240020, 0xce41c406, 0x97c0f24e, 0xc439c409, 0x97800001, 0x7c408001, 0x88000000, 0xc424000b, - 0x32640002, 0x9a40f247, 0xd8800013, 0xce190000, 0x95400004, 0x05980001, 0x56200020, 0xce190000, - 0x97c0f240, 0xd8400013, 0xc439c040, 0x97800001, 0x7c408001, 0x88000000, 0x31ac2580, 0x9ac00011, - 0x31ac260c, 0x9ac0000f, 0x31ac0800, 0x9ac0000d, 0x31ac0828, 0x9ac0000b, 0x31ac2440, 0x9ac00009, - 0x31ac2390, 0x9ac00007, 0x31ac0093, 0x9ac00005, 0x31ac31dc, 0x9ac00003, 0x31ac31e6, 0x96c00004, - 0xc4340004, 0xd8400008, 0x80000ef2, 0x39ac7c06, 0x3db07c00, 0x9ac00003, 0x97000002, 0x80000f40, - 0x39acc337, 0x3db0c330, 0x9ac00003, 0x97000002, 0x80000f40, 0x39acc335, 0x3db0c336, 0x9ac00003, - 0x97000002, 0x80000f40, 0x39acec70, 0x3db0ec6f, 0x9ac00003, 0x97000002, 0x80000f40, 0x39ac9002, - 0x3db09002, 0x9ac00003, 0x97000002, 0x80000f40, 0x39ac9012, 0x3db09012, 0x9ac00003, 0x97000002, - 0x80000f40, 0x80000ef1, 0xc40c0006, 0x98c0ffff, 0x7c40c001, 0x7c410001, 0x7c414001, 0x7c418001, - 0x7c41c001, 0x7c43c001, 0x95c00001, 0xc434000e, 0x2b740008, 0x2b780001, 0xcf400013, 0xd8c1325e, - 0xcf80001a, 0xd8400013, 0x7c034001, 0x7c038001, 0x18e0007d, 0x32240003, 0x9a400006, 0x32240000, - 0x9a400004, 0xcd01c080, 0xcd41c081, 0x80000f88, 0x51640020, 0x7e52401a, 0xd2400072, 0xc8280072, - 0xce81c080, 0x56ac0020, 0x26f0ffff, 0xcf01c081, 0x1af000fc, 0x1334000a, 0x24e02000, 0x7f63400a, - 0x18e00074, 0x32240003, 0x9a400006, 0x32240000, 0x9a400004, 0xcd81c082, 0xcdc1c083, 0x80000f9d, - 0x51e40020, 0x7e5a401a, 0xd2400072, 0xc8280072, 0xce81c082, 0x56ac0020, 0x26f0ffff, 0xcf01c083, - 0x1af000fc, 0x13380016, 0x18e00039, 0x12200019, 0x7fa3800a, 0x7fb7800a, 0x18e0007d, 0x1220001d, - 0x7fa3800a, 0x18e00074, 0x12200014, 0x7fa3800a, 0xcf81c078, 0xcfc1c084, 0x80000c16, 0x7c40c001, - 0x18dc003d, 0x95c00004, 0x041c0000, 0x042c01c8, 0x8c000d61, 0x18d001e8, 0x31140005, 0x99400003, - 0x31140006, 0x95400002, 0x8c00104f, 0x05280fb7, 0x28140002, 0xcd400013, 0x86800000, 0x80000fbe, - 0x80000fbe, 0x80000fc2, 0x80000fbe, 0x80000fd1, 0x80000ff2, 0x80000ff2, 0x24cc003f, 0xccc1a2a4, - 0x7c408001, 0x88000000, 0x7c414001, 0x18e80039, 0x52a8003b, 0x50580020, 0x24cc003f, 0x7d59401a, - 0xd1400072, 0xc8140072, 0x7d69401a, 0xc41c0017, 0x99c0ffff, 0xd140004b, 0xccc1a2a4, 0x7c408001, - 0x88000000, 0xc414000d, 0x04180001, 0x24cc003f, 0x7d958004, 0xcd800035, 0xccc1a2a4, 0xc43c000e, - 0x2bfc0008, 0xcfc00013, 0xc43d3249, 0x1bfc003e, 0x97c00002, 0xd8400074, 0xc4100019, 0x7d150005, - 0x25100001, 0x9500000b, 0x97c0fffc, 0xc4180021, 0x159c0011, 0x259800ff, 0x31a00003, 0x31a40001, - 0x7e25800a, 0x95c0fff5, 0x9580fff4, 0x80000fef, 0xc411326f, 0x1d100010, 0xcd01326f, 0x97c00002, - 0xd8000074, 0x80001b70, 0x04380000, 0xc430000d, 0xc8140023, 0xc4180081, 0x13300005, 0xc011000f, - 0xc4240004, 0x33b40003, 0x97400003, 0xc0340008, 0x80000ffe, 0xc4340035, 0x11a00002, 0x7c908009, - 0x12640004, 0x7d614011, 0xc4100026, 0x05980008, 0x7ca4800a, 0x7d1a0002, 0x7cb0800a, 0x282c2002, - 0x208801a8, 0x3e280008, 0x7cb4800a, 0xcec00013, 0xc4300027, 0x042c0008, 0xd1400025, 0xcf000024, - 0x20240030, 0x7ca48001, 0xcc800026, 0xccc00026, 0x9b800013, 0xcc400026, 0x7c414001, 0x28340000, - 0xcf400013, 0x507c0020, 0x7d7d401a, 0xd1400072, 0xc8140072, 0x557c0020, 0x28342002, 0xcf400013, - 0xcd400026, 0xcfc00026, 0xd4400026, 0x9a80000e, 0x32280000, 0x9a80000b, 0x8000102f, 0xcc000026, - 0xcc000026, 0xcc000026, 0xcc000026, 0xcc000026, 0x9a800005, 0x32280000, 0x9a800002, 0x9a000000, - 0x7c018001, 0xcc000026, 0xd8400027, 0x1cccfe08, 0xd8800013, 0xcec0003a, 0xccc1a2a4, 0xc43c000e, - 0x2bfc0008, 0xcfc00013, 0xc43d3249, 0x1bfc003e, 0x9bc00007, 0xc428000e, 0x16a80008, 0xce800009, - 0xc42c005e, 0x96c00b33, 0xd840003c, 0xc4200025, 0x7da2400f, 0x7da28002, 0x7e1ac002, 0x0aec0001, - 0x96400002, 0x7d2ac002, 0x3ef40010, 0x9b40f11d, 0x04380030, 0xcf81325e, 0x80000c16, 0xde410000, - 0xdcc10000, 0xdd010000, 0xdd410000, 0xdd810000, 0xddc10000, 0xde010000, 0xc40c000e, 0x7c024001, - 0x28cc0008, 0xccc00013, 0xc8100086, 0x5510003f, 0xc40d3249, 0x18cc003e, 0x98c00003, 0x99000011, - 0x80001075, 0x9900000c, 0xc40c0026, 0xc4100081, 0xc4140025, 0x7d15800f, 0x7d15c002, 0x7d520002, - 0x0a200001, 0x95800002, 0x7cde0002, 0x3e20001a, 0x9a000009, 0x040c0030, 0xccc1325e, 0x80001071, - 0xd9c00036, 0xd8400029, 0xc40c005e, 0x94c00b01, 0x04240001, 0xdc200000, 0xdc1c0000, 0xdc180000, - 0xdc140000, 0xdc100000, 0xdc0c0000, 0x96400004, 0xdc240000, 0xdc0c0000, 0x80000c16, 0xdc240000, - 0x90000000, 0xcc40003f, 0xd8c00010, 0xc4080029, 0xcc80003b, 0xc418000e, 0x18a800e5, 0x1d980008, - 0x12a80008, 0x7da9800a, 0x29980008, 0xcd800013, 0x18a400e5, 0x12500009, 0x248c0008, 0x94c00006, - 0x200c006d, 0x7cd0c00a, 0xccc1326c, 0xc421326c, 0x96000001, 0xcd800013, 0x200c0228, 0x7cd0c00a, - 0xccc1326c, 0xc421326c, 0x96000001, 0xc40c002a, 0xc410002b, 0x18881fe8, 0x18d4072c, 0x18cc00d1, - 0x7cd4c00a, 0x3094000d, 0x38d80000, 0x311c0003, 0x99400006, 0x30940007, 0x1620001f, 0x9940001d, - 0x9a000023, 0x800010c4, 0x9580001a, 0x99c00019, 0xccc00041, 0x25140001, 0xc418002c, 0x9940000d, - 0x259c007f, 0x95c00013, 0x19a00030, 0xcdc0001b, 0xd8400021, 0xd8400022, 0xc430000f, 0x17300001, - 0x9b00fffe, 0x9a000012, 0xd8400023, 0x800010cb, 0x199c0fe8, 0xcdc0001b, 0xd8400021, 0xd8400023, - 0xc430000f, 0x17300001, 0x9b00fffe, 0x800010cb, 0xd8c00010, 0xd8000022, 0xd8000023, 0xc430005e, - 0x97000aac, 0x7c408001, 0x88000000, 0xc43c000e, 0xc434002e, 0x2bfc0008, 0x2020002c, 0xcfc00013, - 0xce01326c, 0x17780001, 0x27740001, 0x07a810d8, 0xcf400010, 0xc421326c, 0x96000001, 0x86800000, - 0x80000168, 0x80000aa7, 0x80000bfc, 0x800012e9, 0x8000104c, 0xcc400040, 0xd8800010, 0xc4180032, - 0x29980008, 0xcd800013, 0x200c007d, 0xccc1325b, 0xc411325b, 0x95000001, 0x7c408001, 0x88000000, - 0x28240007, 0xde430000, 0xd4400078, 0x80001190, 0xcc80003b, 0x24b00008, 0xc418000e, 0x1330000a, - 0x18a800e5, 0x1d980008, 0x12a80008, 0x7da9800a, 0x29980008, 0xcd800013, 0xc40d3249, 0x18cc003e, - 0x98c00002, 0xd840003d, 0x2b304000, 0xcf01326c, 0xc431326c, 0x7c40c001, 0x7c410001, 0x7c414001, - 0x192400fd, 0x50580020, 0x7d59401a, 0x7c41c001, 0x06681110, 0x7c420001, 0xcc400078, 0x18ac0024, - 0x19180070, 0x19100078, 0xcec00008, 0x18f40058, 0x5978073a, 0x7f7b400a, 0x97000001, 0x86800000, - 0x80001117, 0x80001118, 0x80001122, 0x8000112d, 0x80001130, 0x80001133, 0x8000016a, 0x8000117b, - 0x24ec0f00, 0x32ec0600, 0x96c00003, 0xc4300006, 0x9b00ffff, 0xd1400025, 0xcf400024, 0xcdc00026, - 0xd8400027, 0x8000117b, 0x24ec0f00, 0x32ec0600, 0x96c00003, 0xc4300006, 0x9b00ffff, 0xd1400025, - 0xcf400024, 0xcdc00026, 0xce000026, 0xd8400027, 0x8000117b, 0xc81c001f, 0x55e00020, 0x80001122, - 0xc81c0020, 0x55e00020, 0x80001122, 0x8c00116b, 0xd8400013, 0xc02a0200, 0x7e8e8009, 0x22a8003d, - 0x22a80074, 0x2774001c, 0x13740014, 0x7eb6800a, 0x25ecffff, 0x55700020, 0x15f40010, 0x13740002, - 0x275c001f, 0x95c00027, 0x7c018001, 0x7f41c001, 0x15dc0002, 0x39e00008, 0x25dc0007, 0x7dc1c01e, - 0x05dc0001, 0x96000004, 0x05e40008, 0x8c00116e, 0x80001168, 0x7dc2001e, 0x06200001, 0x05e40008, - 0x7e62000e, 0x9a000004, 0x7da58001, 0x8c00116e, 0x80001165, 0x7dc2001e, 0x06200001, 0x7e1a0001, - 0x05cc0008, 0x7e0d000e, 0x95000007, 0x7e02401e, 0x06640001, 0x06640008, 0x05d80008, 0x8c00116e, - 0x80001168, 0x7dc2401e, 0x06640001, 0x7da58001, 0x8c00116e, 0x05e00008, 0x7da2000c, 0x9600ffe6, - 0x17640002, 0x8c00116e, 0x80001190, 0xc4200006, 0x9a00ffff, 0x90000000, 0x8c00116b, 0xc420000e, - 0x2a200001, 0xce00001a, 0xce81c078, 0xcec1c080, 0xcc01c081, 0xcd41c082, 0xcf01c083, 0x12640002, - 0x22640435, 0xce41c084, 0x90000000, 0x0528117e, 0x312c0003, 0x86800000, 0x80001190, 0x80001185, - 0x80001182, 0x80001182, 0xc4300012, 0x9b00ffff, 0x9ac0000c, 0xc03a0400, 0xc4340004, 0xd8400013, - 0xd8400008, 0xc418000e, 0x15980008, 0x1198001c, 0x7d81c00a, 0xcdc130b7, 0xcf8130b5, 0xcf400008, - 0x04240008, 0xc418000e, 0xc41c0049, 0x19a000e8, 0x29a80008, 0x7de2c00c, 0xce800013, 0xc421325e, - 0x26200010, 0xc415326d, 0x9a000006, 0xc420007d, 0x96000004, 0x96c00003, 0xce40003e, 0x800011a3, - 0x7d654001, 0xcd41326d, 0x7c020001, 0x96000005, 0xc4100026, 0xc4240081, 0xc4140025, 0x800011b6, - 0xc4253279, 0xc415326d, 0xc431326c, 0x2730003f, 0x3b380006, 0x97800004, 0x3f38000b, 0x9b800004, - 0x800011b4, 0x04300006, 0x800011b4, 0x0430000b, 0x04380002, 0x7fb10004, 0x7e57000f, 0x7e578002, - 0x7d67c002, 0x0be40001, 0x97000002, 0x7d3a4002, 0x202c002c, 0xc421325e, 0x04280020, 0xcec1326c, - 0x26200010, 0x3e640010, 0x96000003, 0x96400002, 0xce81325e, 0xc4300028, 0xc434002e, 0x17780001, - 0x27740001, 0x07a811cf, 0x9b00feb8, 0xcf400010, 0xc414005e, 0x954009a7, 0x86800000, 0x80000168, - 0x80000aa7, 0x80000bfc, 0x800012e9, 0x80000168, 0x8c00120d, 0x7c40c001, 0xccc1c07c, 0xcc41c07d, - 0xcc41c08c, 0x7c410001, 0xcc41c079, 0xcd01c07e, 0x7c414001, 0x18f0012f, 0x18f40612, 0x18cc00c1, - 0x7f73400a, 0x7cf7400a, 0x39600004, 0x9a000002, 0xc0140004, 0x11600001, 0x18fc003e, 0x9740001c, - 0xcf400041, 0xc425c07f, 0x97c00003, 0x166c001f, 0x800011ee, 0x1a6c003e, 0x96c00006, 0x04200002, - 0x0a200001, 0x9a00ffff, 0xd8400013, 0x800011e8, 0xc428002c, 0x96800010, 0x26ac007f, 0xcec0001b, - 0xd8400021, 0x1ab00030, 0x1aac0fe8, 0xc434000f, 0x9b40ffff, 0x97000008, 0xcec0001b, 0xd8400021, - 0xc434000f, 0x9b40ffff, 0x80001205, 0x0a200001, 0x9a00ffff, 0xd8400013, 0xc425c07f, 0x166c001f, - 0x11600001, 0x9ac0fffa, 0x8c001232, 0x7c408001, 0x88000000, 0xd8000033, 0xc438000b, 0xc43c0009, - 0x27fc0001, 0x97c0fffe, 0xd8400013, 0xd841c07f, 0xc43dc07f, 0x1bfc0078, 0x7ffbc00c, 0x97c0fffd, - 0x90000000, 0xc03a2800, 0xcf81c07c, 0xcc01c07d, 0xcc01c08c, 0xcc01c079, 0xcc01c07e, 0x04380040, - 0xcf80001b, 0xd8400021, 0xc438000f, 0x9b80ffff, 0x04380060, 0xcf80001b, 0xd8400021, 0xc438000f, - 0x9b80ffff, 0x04380002, 0x0bb80001, 0x9b80ffff, 0xd8400013, 0xc43dc07f, 0x17fc001f, 0x04380010, - 0x9bc0fffa, 0x90000000, 0xd8400013, 0xd801c07f, 0xd8400013, 0xc43dc07f, 0xcfc00078, 0xd8000034, - 0x90000000, 0xc03ae000, 0xcf81c200, 0xc03a0800, 0xcf81c07c, 0xcc01c07d, 0xcc01c08c, 0xcc01c079, - 0xcc01c07e, 0x04380040, 0xcf80001b, 0xd8400021, 0xc438000f, 0x9b80ffff, 0x04380002, 0x0bb80001, - 0x9b80ffff, 0xd8400013, 0xc43dc07f, 0x17fc001f, 0x04380010, 0x9bc0fffa, 0x90000000, 0xc03ae000, - 0xcf81c200, 0xc03a4000, 0xcf81c07c, 0xcc01c07d, 0xcc01c08c, 0xcc01c079, 0xcc01c07e, 0x04380002, - 0x0bb80001, 0x9b80ffff, 0xd8400013, 0xc43dc07f, 0x17fc001f, 0x04380010, 0x9bc0fffa, 0x90000000, - 0xc40c0007, 0x30d00002, 0x99000052, 0xd8400029, 0xc424005e, 0x9640090f, 0x7c410001, 0xc428000e, - 0x1514001f, 0x19180038, 0x2aa80008, 0x99400030, 0x30dc0001, 0xce800013, 0x99c0000a, 0xc42d324e, - 0xc431324d, 0x52ec0020, 0x7ef2c01a, 0xc435324f, 0xc4293256, 0x1ab0c006, 0x52ec0008, 0x8000127f, - 0xc42d3258, 0xc4313257, 0x52ec0020, 0x7ef2c01a, 0xc4353259, 0xc429325a, 0x1ab0c012, 0x07740001, - 0x04240002, 0x26a0003f, 0x7e624004, 0x7f67800f, 0x97800002, 0x04340000, 0x53740002, 0x7ef6c011, - 0x1ab42010, 0x16a8000c, 0x26a80800, 0x2b740000, 0x7f73400a, 0x7f6b400a, 0xcf40001c, 0xd2c0001e, - 0xd8400021, 0xc438000f, 0x9b80ffff, 0xc4100011, 0x1514001f, 0x99400006, 0x9980000a, 0x8c0012e1, - 0xc40c0007, 0x04100000, 0x80001267, 0xd800002a, 0xc424005e, 0x964008d7, 0xd9800036, 0x80000c16, - 0xc42c001d, 0x95c00005, 0xc431325a, 0x1b300677, 0x11dc000c, 0x800012aa, 0xc4313256, 0x1b34060b, - 0x1b300077, 0x7f37000a, 0x13300017, 0x04340100, 0x26ec00ff, 0xc03a8002, 0x7ef6c00a, 0x7edec00a, - 0x7f3b000a, 0x7ef2c00a, 0xcec1325b, 0x80000c16, 0xc4140032, 0xc410001d, 0x29540008, 0xcd400013, - 0xc40d325b, 0x1858003f, 0x251000ff, 0x99800007, 0x7d0cc00a, 0xccc1325b, 0xc411325d, 0x251001ef, - 0xcd01325d, 0x80000168, 0x18d0006c, 0x18d407f0, 0x9900000e, 0x04100002, 0xc4193256, 0xc41d324f, - 0x2598003f, 0x7d190004, 0x7d5d4001, 0x7d52000f, 0x9a000003, 0xcd41324f, 0x800012d8, 0x7d514002, - 0xcd41324f, 0x800012d8, 0xc4193259, 0xc41d325a, 0x7d958001, 0x7dd5c002, 0xcd813259, 0xcdc1325a, - 0xc411325d, 0x251001ef, 0xcd01325d, 0x1ccc001e, 0xccc1325b, 0xc40d325b, 0x94c00001, 0x7c408001, - 0x88000000, 0xc40c0021, 0xc4340028, 0x14f00010, 0xc4380030, 0xc43c0007, 0x9b000004, 0x9b40000c, - 0x9b80000f, 0x90000000, 0x17300001, 0x9b000005, 0xccc00037, 0x8c000190, 0xd8000032, 0x90000000, - 0xd8000028, 0xd800002b, 0x80000168, 0xd980003f, 0x97c00002, 0xd9c0003f, 0x80001082, 0xd9800040, - 0x97c00002, 0xd9c00040, 0x800010de, 0xc43c0007, 0x33f80003, 0x97800051, 0xcc80003b, 0x24b00008, - 0xc418000e, 0x1330000a, 0x18a800e5, 0x1d980008, 0x12a80008, 0x7da9800a, 0x29980008, 0xcd800013, - 0xc4353249, 0x1b74003e, 0x9b400002, 0xd840003d, 0x2b304000, 0xcf01326c, 0xc431326c, 0x97000001, - 0x7c434001, 0x1b4c00f8, 0x7c410001, 0x7c414001, 0x50700020, 0x04e81324, 0x18ac0024, 0x7c41c001, - 0x50600020, 0xcc400078, 0x30e40004, 0x9a400007, 0x7d71401a, 0x596401fc, 0x12640009, 0x1b74008d, - 0x7e76400a, 0x2a640000, 0xcec00008, 0x86800000, 0x8000016a, 0x8000016a, 0x8000016a, 0x8000016a, - 0x8000132c, 0x8000133b, 0x80001344, 0x8000016a, 0xc4340004, 0xd8400013, 0xd8400008, 0xc42530b5, - 0x1a68003a, 0x9a80fffe, 0x2024003a, 0xc418000e, 0x25980700, 0x11980014, 0x7d19000a, 0xcd0130b7, - 0xce4130b5, 0xcf400008, 0x80001190, 0xce40001c, 0xd140001e, 0xd8400021, 0xc428000f, 0x9a80ffff, - 0xc4240011, 0x7de6800f, 0x9a80ffea, 0x80001190, 0xce40001c, 0xd140001e, 0xd8400021, 0xc428000f, - 0x9a80ffff, 0xc8240011, 0x7de1c01a, 0x7de6800f, 0x9a80ffe0, 0x80001190, 0x8c00104f, 0x28182002, - 0xc430000d, 0xc4340035, 0xcd800013, 0xc8140023, 0xc4180081, 0x13300005, 0xc4240004, 0x11a00002, - 0x12640004, 0x7d614011, 0xc4100026, 0x05980008, 0x7ca4800a, 0x7d1a0002, 0x7cb0800a, 0x3e280008, - 0x7cb4800a, 0xc4300027, 0x042c0008, 0xd1400025, 0xcf000024, 0x20240030, 0x7ca48001, 0xcc800026, - 0x7c434001, 0x1b4c00f8, 0xcf400026, 0xcc400026, 0x28340000, 0xcf400013, 0x7c414001, 0x507c0020, - 0x30e40004, 0x9a400005, 0x7d7d401a, 0xd1400072, 0xc8140072, 0x557c0020, 0x28342002, 0xcf400013, - 0xcd400026, 0xcfc00026, 0xd4400026, 0xcc000026, 0x9a800005, 0x32280000, 0x9a800002, 0x9a000000, - 0x7c018001, 0xd8400027, 0xd8800013, 0x04380028, 0xcec0003a, 0xcf81a2a4, 0x80001037, 0xd8400029, - 0xc40c005e, 0x94c007eb, 0x7c40c001, 0x50500020, 0x7d0d001a, 0xd1000072, 0xc8100072, 0x591c01fc, - 0x11dc0009, 0x45140210, 0x595801fc, 0x11980009, 0x29dc0000, 0xcdc0001c, 0xd140001e, 0xd8400021, - 0xc418000f, 0x9980ffff, 0xc4200011, 0x1624001f, 0x96400069, 0xc40c000e, 0x28cc0008, 0xccc00013, - 0xce013249, 0x1a307fe8, 0xcf00000a, 0x23304076, 0xd1000001, 0xcf000001, 0xc41d3254, 0xc4253256, - 0x18cc00e8, 0x10cc0015, 0x4514020c, 0xd140001e, 0xd8400021, 0xc418000f, 0x9980ffff, 0xc4200011, - 0xce013248, 0x1a2001e8, 0x12200014, 0x2a204001, 0xce000013, 0x1a64003c, 0x1264001f, 0x11dc0009, - 0x15dc000b, 0x7dcdc00a, 0x7e5dc00a, 0xcdc00100, 0xd8800013, 0xd8400010, 0xd800002a, 0xd8400008, - 0xcf00000d, 0xcf00000a, 0x8c001427, 0x04340022, 0x07740001, 0x04300010, 0xdf430000, 0x7c434001, - 0x7c408001, 0xd4412e01, 0x0434001e, 0xdf430000, 0xd4400078, 0xdf030000, 0xd4412e40, 0xd8400013, - 0xcc41c030, 0xcc41c031, 0x248dfffe, 0xccc12e00, 0xd8800013, 0xcc812e00, 0x7c434001, 0x7c434001, - 0x8c00142b, 0xd8000010, 0xc40c000e, 0x28cc0008, 0xccc00013, 0x45140248, 0xd140001e, 0xd8400021, - 0xc418000f, 0x9980ffff, 0xc8200011, 0xce013257, 0x56200020, 0xce013258, 0x0434000c, 0xdb000024, - 0xd1400025, 0xd8000026, 0xd8000026, 0xd8400027, 0x45540008, 0xd140001e, 0xd8400021, 0xc418000f, - 0x9980ffff, 0xc8200011, 0xce013259, 0x56200020, 0xc0337fff, 0x7f220009, 0xce01325a, 0x55300020, - 0x7d01c001, 0x042c01d0, 0x8c000d61, 0x06ec0004, 0x7f01c001, 0x8c000d61, 0x041c0002, 0x042c01c8, - 0x8c000d61, 0xc4380012, 0x9b80ffff, 0xd800002a, 0x80000aa7, 0xd800002a, 0x7c408001, 0x88000000, - 0xd8400029, 0x7c40c001, 0x50500020, 0x8c001427, 0x7cd0c01a, 0xc4200007, 0xd0c00072, 0xc8240072, - 0xd240001e, 0x7c414001, 0x19682011, 0x5a6c01fc, 0x12ec0009, 0x7eeac00a, 0x2aec0000, 0xcec0001c, - 0xd8400021, 0xc430000f, 0x9b00ffff, 0xc4180011, 0x7c438001, 0x99800007, 0xdf830000, 0xcfa0000c, - 0x8c00142b, 0xd4400078, 0xd800002a, 0x80001b70, 0x8c00142b, 0xd800002a, 0x80001b70, 0xd8000012, - 0xc43c0008, 0x9bc0ffff, 0x90000000, 0xd8400012, 0xc43c0008, 0x97c0ffff, 0x90000000, 0xc4380007, - 0x7c40c001, 0x17b80001, 0x18d40038, 0x7c410001, 0x9b800004, 0xd8400029, 0xc414005e, 0x9540073d, - 0x18c80066, 0x7c414001, 0x30880001, 0x7c418001, 0x94800008, 0x8c00187c, 0xcf400013, 0xc42c0004, - 0xd8400008, 0xcd910000, 0xcec00008, 0x7d410001, 0x043c0000, 0x7c41c001, 0x7c420001, 0x04240001, - 0x06200001, 0x4220000c, 0x0a640001, 0xcc000078, 0x9a40fffe, 0x24e80007, 0x24ec0010, 0xd8400013, - 0x9ac00006, 0xc42c0004, 0xd8400008, 0xc5310000, 0xcec00008, 0x80001465, 0x51540020, 0x7d15001a, - 0xd1000072, 0xc82c0072, 0xd2c0001e, 0x18f02011, 0x5aec01fc, 0x12ec0009, 0x7ef2c00a, 0x2aec0000, - 0xcec0001c, 0xd8400021, 0xc42c000f, 0x9ac0ffff, 0xc4300011, 0x96800012, 0x12a80001, 0x0aa80001, - 0x06a8146a, 0x7f1f0009, 0x86800000, 0x7f1b400f, 0x80001478, 0x7f1b400e, 0x80001478, 0x7f1b400c, - 0x8000147a, 0x7f1b400d, 0x8000147a, 0x7f1b400f, 0x8000147a, 0x7f1b400e, 0x8000147a, 0x7f334002, - 0x97400014, 0x8000147b, 0x9b400012, 0x9b800005, 0x9bc0001f, 0x7e024001, 0x043c0001, 0x8000144a, - 0xc40c0032, 0xc438001d, 0x28cc0008, 0xccc00013, 0xc43d325b, 0x1bb81ff0, 0x7fbfc00a, 0xcfc1325b, - 0xc411325d, 0x251001ef, 0xcd01325d, 0x80001b70, 0x94800007, 0x8c00187c, 0xcf400013, 0xc42c0004, - 0xd8400008, 0xcd910000, 0xcec00008, 0x9b800003, 0xd800002a, 0x80001b70, 0xc40c0032, 0x28cc0008, - 0xccc00013, 0xc40d325b, 0x800012c2, 0xc40c000e, 0xc43c0007, 0xc438001d, 0x28cc0008, 0xccc00013, - 0x13f4000c, 0x9bc00006, 0xc43d3256, 0x1bf0060b, 0x1bfc0077, 0x7ff3c00a, 0x800014a9, 0xc43d325a, - 0x1bfc0677, 0x04300100, 0x1bb81ff0, 0x7f73400a, 0xc0328007, 0x7fb7800a, 0x13fc0017, 0x7ff3c00a, - 0x7ffbc00a, 0xcfc1325b, 0xc03a0002, 0xc4340004, 0xd8400013, 0xd8400008, 0xcf8130b5, 0xcf400008, - 0x80000c16, 0x043c0000, 0xc414000e, 0x29540008, 0xcd400013, 0xc4193246, 0xc41d3245, 0x51980020, - 0x7dd9c01a, 0x45dc0390, 0xc4313267, 0x04183000, 0xcd813267, 0x1b380057, 0x1b340213, 0x1b300199, - 0x7f7b400a, 0x7f73400a, 0xcf400024, 0xd1c00025, 0xcc800026, 0x7c420001, 0xce000026, 0x7c424001, - 0xce400026, 0x7c428001, 0xce800026, 0x7c42c001, 0xcec00026, 0x7c430001, 0xcf000026, 0x7c434001, - 0xcf400026, 0x7c438001, 0xcf800026, 0xd8400027, 0xcd400013, 0x04182000, 0xcd813267, 0xd840004f, - 0x1a0800fd, 0x109c000a, 0xc4193265, 0x7dd9c00a, 0xcdc13265, 0x2620ffff, 0xce080228, 0x9880000e, - 0xce480250, 0xce880258, 0xd8080230, 0xd8080238, 0xd8080240, 0xd8080248, 0xd8080268, 0xd8080270, - 0xd8080278, 0xd8080280, 0xd800004f, 0x97c0ec75, 0x90000000, 0x040c0000, 0x041c0010, 0x26180001, - 0x09dc0001, 0x16200001, 0x95800002, 0x04cc0001, 0x99c0fffb, 0xccc80230, 0xd8080238, 0xd8080240, - 0xd8080248, 0x040c0000, 0xce480250, 0xce880258, 0x52a80020, 0x7e6a401a, 0x041c0020, 0x66580001, - 0x09dc0001, 0x56640001, 0x95800002, 0x04cc0001, 0x99c0fffb, 0xccc80260, 0xd8080268, 0xd8080270, - 0xd8080278, 0xd8080280, 0x040c0000, 0xcec80288, 0xcf080290, 0xcec80298, 0xcf0802a0, 0x040c0000, - 0x041c0010, 0xcf4802a8, 0x27580001, 0x09dc0001, 0x17740001, 0x95800002, 0x04cc0001, 0x99c0fffb, - 0xccc802b0, 0xd80802b8, 0x178c000b, 0x27b8003f, 0x7cf8c001, 0xcf8802c0, 0xccc802c8, 0xcf8802d0, - 0xcf8802d8, 0xd800004f, 0x97c00002, 0x90000000, 0x7c408001, 0x88000000, 0xc40c000e, 0x28cc0008, - 0xccc00013, 0xc43d3265, 0x1bc800ea, 0x7c418001, 0x25b8ffff, 0xc4930240, 0xc48f0238, 0x04cc0001, - 0x24cc000f, 0x7cd2800c, 0x9a80000b, 0xc5230309, 0x2620ffff, 0x7e3a400c, 0x9a400004, 0x05100001, - 0x2510000f, 0x80001539, 0xcd08034b, 0xd4400078, 0x80000168, 0xc48f0230, 0xc4930240, 0x98c00004, - 0xcd880353, 0x8c00163f, 0xc49b0353, 0xc4930238, 0xc48f0228, 0x05100001, 0x2510000f, 0x7cd14005, - 0x25540001, 0x99400004, 0x05100001, 0x2510000f, 0x8000154f, 0xc48f0230, 0x7c41c001, 0xcd080238, - 0xcd08034b, 0x08cc0001, 0x2598ffff, 0x3d200008, 0xccc80230, 0xcd900309, 0xd8100319, 0x04340801, - 0x2198003f, 0xcf400013, 0xcd910ce7, 0xc4190ce6, 0x7d918005, 0x25980001, 0x9580fffd, 0x7d918004, - 0xcd810ce6, 0x9a000003, 0xcdd1054f, 0x8000156e, 0x090c0008, 0xcdcd050e, 0x040c0000, 0x110c0014, - 0x28cc4001, 0xccc00013, 0xcc41230a, 0xcc41230b, 0xcc41230c, 0xcc41230d, 0xcc480329, 0xcc48032a, - 0xcc4802e0, 0xd8000055, 0xc48f02e0, 0x24d8003f, 0x09940001, 0x44100001, 0x9580002c, 0x95400005, - 0x09540001, 0x51100001, 0x69100001, 0x8000157f, 0x24cc003f, 0xc4970290, 0xc49b0288, 0x51540020, - 0x7d59401a, 0xc49b02a0, 0xc49f0298, 0x51980020, 0x7d9d801a, 0x041c0040, 0x04200000, 0x7dcdc002, - 0x7d924019, 0x7d26400c, 0x09dc0001, 0x9a400008, 0x51100001, 0x06200001, 0x99c0fffa, 0xc48f0230, - 0xc4930240, 0x8c00163f, 0x80001579, 0x7d010021, 0x7d914019, 0xc4930238, 0x55580020, 0xcd480298, - 0xcd8802a0, 0x10d40010, 0x12180016, 0xc51f0309, 0x7d95800a, 0x7d62000a, 0x7dd9c00a, 0xd8400013, - 0xcdd00309, 0xce113320, 0xc48f02e0, 0xc49b02b0, 0x18dc01e8, 0x7dd9400e, 0xc48f0230, 0xc4930240, - 0x95c0001d, 0x95400003, 0x8c00163f, 0x800015aa, 0xc48f0238, 0xc4a302b8, 0x12240004, 0x7e5e400a, - 0xc4ab02a8, 0x04100000, 0xce4c0319, 0x7d9d8002, 0x7ea14005, 0x25540001, 0x99400004, 0x06200001, - 0x2620000f, 0x800015bc, 0x09dc0001, 0x04240001, 0x7e624004, 0x06200001, 0x7d25000a, 0x2620000f, - 0x99c0fff4, 0xd8400013, 0xcd0d3330, 0xce0802b8, 0xcd8802b0, 0xc4ab02e0, 0x1aa807f0, 0xc48f02d0, - 0xc49702d8, 0xc49b02c8, 0xc49f02c0, 0x96800028, 0x7d4e000f, 0x9600000b, 0x7d964002, 0x7e6a000f, - 0x96000003, 0x7d694001, 0x800015e9, 0x7cde4002, 0x7e6a000f, 0x96000008, 0x7de94001, 0x800015e9, - 0x7cd64002, 0x7e6a000e, 0x96000003, 0x7d694001, 0x800015e9, 0xc48f0230, 0xc4930240, 0x8c00163f, - 0x800015cd, 0xc4930238, 0x7d698002, 0xcd4802d8, 0x129c0008, 0xc50f0319, 0x11a0000e, 0x11140001, - 0xc4340004, 0xd8400008, 0xd8400013, 0x7e1e000a, 0x1198000a, 0xcd953300, 0x7e0e000a, 0x12a8000a, - 0xce953301, 0xce100319, 0xcf400008, 0xc4b70280, 0xc4b30278, 0x7f73800a, 0x536c0020, 0x7ef2c01a, - 0x9780eb68, 0x8c001608, 0xd8080278, 0xd8080280, 0x7c408001, 0x88000000, 0x043c0003, 0x80001609, - 0x043c0001, 0x30b40000, 0x9b400011, 0xc4b70258, 0xc4b30250, 0x53780020, 0x7fb3801a, 0x7faf8019, - 0x04300020, 0x04280000, 0x67b40001, 0x0b300001, 0x57b80001, 0x97400002, 0x06a80001, 0x9b00fffb, - 0xc4bb0260, 0x7fab8001, 0xcf880260, 0x04300020, 0x04280000, 0x66f40001, 0x0b300001, 0x56ec0001, - 0x97400005, 0x8c001628, 0xc4353247, 0x7f7f4009, 0x9b40fffe, 0x06a80001, 0x9b00fff7, 0x90000000, - 0x269c0007, 0x11dc0008, 0x29dc0008, 0x26a00018, 0x12200003, 0x7de1c00a, 0x26a00060, 0x06200020, - 0x16200001, 0x7de1c00a, 0xcdc00013, 0x90000000, 0x269c0018, 0x26a00007, 0x26a40060, 0x11dc0006, - 0x12200006, 0x16640001, 0x29dc0008, 0x7de1c00a, 0x7de5c00a, 0xcdc00013, 0x90000000, 0xc4b70228, - 0x05100001, 0x04cc0001, 0x2510000f, 0xccc80230, 0x7f514005, 0x25540001, 0x99400004, 0x05100001, - 0x2510000f, 0x80001644, 0xc4b30248, 0xcd080240, 0x7f130005, 0x27300001, 0x9b000002, 0x8c001688, - 0x8c00120d, 0x8c001219, 0x8c001232, 0x04300001, 0x04340801, 0x7f130004, 0xcf400013, 0xcf01051e, - 0xc42d051f, 0x7ed2c005, 0x26ec0001, 0x96c0fffd, 0xcf01051f, 0xd8000055, 0xc5170309, 0x195c07f0, - 0x196007f6, 0x04340000, 0x95c00008, 0x09dc0001, 0x04340001, 0x95c00005, 0x09dc0001, 0x53740001, - 0x6b740001, 0x80001665, 0xc4a702a0, 0xc4ab0298, 0x52640020, 0x7e6a401a, 0x7f634014, 0x7e76401a, - 0xc4300004, 0xd8400008, 0xd8400013, 0x56680020, 0xd8113320, 0xce480298, 0xce8802a0, 0xc5170319, - 0xc4b702b0, 0x255c000f, 0x7f5f4001, 0xd8113330, 0xcf4802b0, 0x11340001, 0x195c07e8, 0x196007ee, - 0xd8353300, 0x7e1e4001, 0xd8353301, 0xce4802d0, 0xd8100309, 0xd8100319, 0xcf000008, 0x90000000, - 0xc4970258, 0xc48f0250, 0x51540020, 0x7cd4c01a, 0xc4af0280, 0xc4b30278, 0x52ec0020, 0x7ef2c01a, - 0x04140020, 0x04280000, 0x64d80001, 0x09540001, 0x54cc0001, 0x95800060, 0x8c001628, 0xc4193247, - 0x25980001, 0x9580005c, 0x7dc24001, 0xc41d3248, 0x25dc000f, 0x7dd2000c, 0x96000057, 0xc41d3255, - 0xc435324f, 0x7df5c00c, 0x99c00004, 0xc4193265, 0x25980040, 0x9580fffe, 0xc439325b, 0x1bb0003f, - 0x97000049, 0x1bb000e8, 0x33380003, 0x9b800046, 0x33300002, 0x9700000a, 0xc4393260, 0x1bb000e4, - 0x33300004, 0x97000040, 0xc431325d, 0x27300010, 0x9b00fffe, 0x800016f1, 0xce400013, 0xc033ffff, - 0x2f3000ff, 0xc439325b, 0x7f3b0009, 0xcf01325b, 0xc439325b, 0x27b800ff, 0x9b80fffe, 0xd8c00033, - 0xc4300009, 0x27300008, 0x9700fffe, 0x1a7003e6, 0x27380003, 0x13b80004, 0x27300003, 0x13300003, - 0x7fb38001, 0x1a7000e8, 0x7fb38001, 0x13300001, 0x7fb38001, 0x07b80002, 0xd8400013, 0x1a700064, - 0x33300002, 0x97000009, 0x17b00005, 0x07300003, 0xcf012082, 0xcc01203f, 0xd8400013, 0xcc01203f, - 0x0b300003, 0x800016df, 0x17b00005, 0xcf012082, 0xcc01203f, 0xd8400013, 0xcc01203f, 0x13300005, - 0x7fb30002, 0xc4392083, 0x7fb38005, 0x27b80001, 0x9b80ffdf, 0xd8c00034, 0xce400013, 0xc431325d, - 0x27300010, 0x9b00fffe, 0xc439325b, 0x27b000ff, 0x9b00ffca, 0xd841325d, 0x2030007b, 0xcf01325b, - 0x800016f2, 0xd841325d, 0x04300001, 0x7f2b0014, 0x7ef2c01a, 0x06a80001, 0x9940ff9c, 0x8c001608, - 0xd8080278, 0xd8080280, 0x90000000, 0xd840004f, 0xc414000e, 0x29540008, 0xcd400013, 0xc43d3265, - 0x1bc800ea, 0xd80802e9, 0x7c40c001, 0x18fc0064, 0x9bc00042, 0xc4193246, 0xc41d3245, 0x51980020, - 0x7dd9801a, 0x45980400, 0xc4313267, 0x043c3000, 0xcfc13267, 0xc43d3267, 0x9bc00001, 0x1b380057, - 0x1b340213, 0x1b300199, 0x7f7b400a, 0x7f73400a, 0xcf400024, 0x14f4001d, 0xc4bf02e9, 0x9bc0001c, - 0x7c410001, 0x192807fa, 0xc4bf0258, 0xc4a70250, 0x53fc0020, 0x7e7e401a, 0x042c0000, 0x04300000, - 0x667c0001, 0x56640001, 0x06ec0001, 0x97c0fffd, 0x07300001, 0x0aec0001, 0x7eebc00c, 0x06ec0001, - 0x97c0fff8, 0x0b300001, 0x43300007, 0x53300002, 0x7db30011, 0xd3000025, 0xc03ec005, 0x2bfca200, - 0xcfc00026, 0xccc00026, 0xcd000026, 0x192807fa, 0xc01f007f, 0x7d1d0009, 0x2110007d, 0x8c001628, - 0x203c003f, 0xcfc13256, 0x8c0017f5, 0xcd013254, 0x18fc01e8, 0xcfc13248, 0x8c00185b, 0xd8413247, - 0x0b740001, 0x9b40ffd5, 0xd800004f, 0xc4bf02e9, 0x97c0ea24, 0x90000000, 0x14d4001d, 0xc4930260, - 0x7d52400e, 0xc49f0258, 0xc4a30250, 0x51dc0020, 0x7de1801a, 0x96400017, 0x7d534002, 0xc4af0270, - 0x7dae4005, 0x26640001, 0x32e0001f, 0x9a400006, 0x06ec0001, 0x96000002, 0x042c0000, 0xcec80270, - 0x8000174f, 0x0b740001, 0x8c00178a, 0x05100001, 0x9b40fff3, 0xc4af0280, 0xc4b30278, 0x52ec0020, - 0x7ef2c01a, 0x8c001608, 0xd8080278, 0xd8080280, 0xc4ab0268, 0x7daa4005, 0x26640001, 0x32a0001f, - 0x9a400005, 0x06a80001, 0x96000002, 0x24280000, 0x80001765, 0x7c410001, 0xc01f007f, 0x09540001, - 0x7d1d0009, 0x2110007d, 0x8c001628, 0xd8013256, 0x8c0017f2, 0xcd013254, 0xc4113248, 0x15100004, - 0x11100004, 0xc4b3034b, 0x7f13000a, 0xcf013248, 0xc4930260, 0x8c001855, 0x32a4001f, 0xd8413247, - 0xd800004f, 0x09100001, 0x06a80001, 0x96400002, 0x24280000, 0xcd080260, 0xce880268, 0x9940ffc0, - 0x7c408001, 0x88000000, 0x7ec28001, 0x8c001628, 0x32e0001f, 0xc4253247, 0x26640001, 0x9640005e, - 0xc4293265, 0xc4253255, 0xc431324f, 0x7e72400c, 0x26a80040, 0x9a400002, 0x9680fff7, 0xc429325b, - 0x1aa4003f, 0x96400049, 0x1aa400e8, 0x32680003, 0x9a800046, 0x32640002, 0x9640000a, 0xc4293260, - 0x1aa400e4, 0x32640004, 0x96400040, 0xc425325d, 0x26640010, 0x9a40fffe, 0x800017e2, 0xcdc00013, - 0xc027ffff, 0x2e6400ff, 0xc429325b, 0x7e6a4009, 0xce41325b, 0xc429325b, 0x26a800ff, 0x9a80fffe, - 0xd8c00033, 0xc4240009, 0x26640008, 0x9640fffe, 0x19e403e6, 0x26680003, 0x12a80004, 0x26640003, - 0x12640003, 0x7ea68001, 0x19e400e8, 0x7ea68001, 0x12640001, 0x7ea68001, 0x06a80002, 0xd8400013, - 0x19e40064, 0x32640002, 0x96400009, 0x16a40005, 0x06640003, 0xce412082, 0xcc01203f, 0xd8400013, - 0xcc01203f, 0x0a640003, 0x800017d0, 0x16a40005, 0xce412082, 0xcc01203f, 0xd8400013, 0xcc01203f, - 0x12640005, 0x7ea64002, 0xc4292083, 0x7ea68005, 0x26a80001, 0x9a80ffdf, 0xd8c00034, 0xcdc00013, - 0xc425325d, 0x26640010, 0x9a40fffe, 0xc429325b, 0x26a400ff, 0x9a40ffca, 0xd841325d, 0x2024007b, - 0xce41325b, 0x800017e3, 0xd841325d, 0xc4a70280, 0xc4ab0278, 0x52640020, 0x7e6a401a, 0x04280001, - 0x7eae8014, 0x7e6a401a, 0x56680020, 0xce480278, 0xce880280, 0x06ec0001, 0x96000002, 0x042c0000, - 0xcec80270, 0x90000000, 0x7c438001, 0x7c420001, 0x800017fe, 0xc4bf02e9, 0x9bc00006, 0x7c438001, - 0x7c420001, 0xcf800026, 0xce000026, 0x800017fe, 0xc43b02eb, 0xc42302ec, 0xcf813245, 0xce013246, - 0x52200020, 0x7fa3801a, 0x47b8020c, 0x15e00008, 0x1220000a, 0x2a206032, 0x513c001e, 0x7e3e001a, - 0xc4bf02e9, 0x9bc00005, 0xc43c000e, 0x2bfc0008, 0xcfc00013, 0x8000180f, 0xcd400013, 0xc4313267, - 0x1b3c0077, 0x1b300199, 0x7ff3000a, 0x1330000a, 0x2b300032, 0x043c3000, 0xcfc13267, 0xc43d3267, - 0xd200000b, 0xc4200007, 0xd3800002, 0xcf000002, 0xd8000040, 0x96000002, 0xd8400040, 0xd8400018, - 0x043c2000, 0xcfc13267, 0xd8000018, 0xd8800010, 0xcdc00013, 0x7dc30001, 0xdc1e0000, 0x04380032, - 0xcf80000e, 0x8c001427, 0xcc413248, 0xc43d3269, 0x27fc000f, 0x33fc0003, 0x97c00011, 0x043c001f, - 0xdfc30000, 0xd4413249, 0x7c43c001, 0x7c43c001, 0x043c0024, 0x0bfc0021, 0xdfc30000, 0xd441326a, - 0x173c0008, 0x1b300303, 0x7f3f0001, 0x043c0001, 0x7ff3c004, 0xcfc13084, 0x80001842, 0x043c0024, - 0xdfc30000, 0xd4413249, 0x7c43c001, 0x23fc003f, 0xcfc1326d, 0x0bb80026, 0xdf830000, 0xd441326e, - 0x7c438001, 0x7c438001, 0xc4393265, 0x1fb8ffc6, 0xddc30000, 0xcf813265, 0x9a000003, 0xcdc0000c, - 0x80001852, 0xcdc0000d, 0xce000010, 0x8c00142b, 0x90000000, 0x7c41c001, 0x7c420001, 0xcdc13252, - 0xce013253, 0x8c001628, 0x80001878, 0xc49f02e9, 0x99c00018, 0x7c41c001, 0x7c420001, 0xcdc13252, - 0xce013253, 0xc43c000e, 0x2bfc0008, 0xcfc00013, 0x043c3000, 0xcfc13267, 0xc43d3267, 0x97c0ffff, - 0xcdc00026, 0xce000026, 0xd8400027, 0xc41c0012, 0x99c0ffff, 0xc43c000e, 0x2bfc0008, 0xcfc00013, - 0x043c2000, 0xcfc13267, 0x8c001628, 0x80001878, 0xc41f02ed, 0xc42302ee, 0xcdc13252, 0xce013253, - 0x04200001, 0x7e2a0004, 0xce013084, 0x90000000, 0x28340001, 0x313c0bcc, 0x9bc00010, 0x393c051f, - 0x9bc00004, 0x3d3c050e, 0x9bc0000c, 0x97c0000c, 0x393c0560, 0x9bc00004, 0x3d3c054f, 0x9bc00007, - 0x97c00007, 0x393c1538, 0x9bc00005, 0x3d3c1537, 0x9bc00002, 0x97c00002, 0x2b740800, 0x90000000, - 0xc40c000e, 0x28cc0008, 0xccc00013, 0xc43d3265, 0x1bc800ea, 0x7c40c001, 0x18e8007c, 0x7c42c001, - 0x06a8189a, 0x86800000, 0x8000189e, 0x800018c5, 0x800018f2, 0x8000016a, 0x7c414001, 0x18d0007e, - 0x50580020, 0x09200001, 0x7d59401a, 0xd1400072, 0xc8140072, 0x09240002, 0x7c418001, 0x7c41c001, - 0x99000011, 0xc4340004, 0xd8400013, 0xd8400008, 0xc42130b5, 0x1a24002c, 0x9a40fffe, 0x2020002c, - 0xc418000d, 0x1198001c, 0x10cc0004, 0x14cc0004, 0x7cd8c00a, 0xccc130b7, 0xce0130b5, 0xcf400008, - 0x80000168, 0xd1400025, 0x5978073a, 0x2bb80002, 0xcf800024, 0xcd800026, 0xcdc00026, 0xd8400027, - 0x9600e8a8, 0xc4300012, 0x9b00ffff, 0x9640e8a5, 0x800018a9, 0x04140000, 0xc55b0309, 0x3d5c0010, - 0x05540001, 0x2598ffff, 0x09780001, 0x7dad800c, 0x99c0ffd2, 0x9580fff9, 0xc4970258, 0xc4930250, - 0x51540020, 0x7d15001a, 0x04140020, 0x04280000, 0x442c0000, 0x65180001, 0x09540001, 0x55100001, - 0x9580000b, 0x8c001628, 0xc41d3248, 0x04300001, 0x7f2b0014, 0x25dc000f, 0x7df9c00c, 0x95c00004, - 0x7ef2c01a, 0xd8c13260, 0xd901325d, 0x06a80001, 0x9940fff1, 0x04140020, 0x04280000, 0x66d80001, - 0x09540001, 0x56ec0001, 0x95800005, 0x8c001628, 0xc421325d, 0x26240007, 0x9a40fffe, 0x06a80001, - 0x9940fff7, 0x8000189e, 0x04140020, 0x04280000, 0x09540001, 0x8c001628, 0xc41d3254, 0xc023007f, - 0x19e4003e, 0x7de1c009, 0x7dee000c, 0x96400008, 0x96000007, 0xd8c13260, 0xd901325d, 0xc421325d, - 0x261c0007, 0x99c0fffe, 0x8000189e, 0x06a80001, 0x9940fff0, 0x8000189e, 0xc40c000e, 0x28cc0008, - 0xccc00013, 0xc43d3265, 0x1bc800ea, 0x7c40c001, 0x18e00064, 0x06281911, 0x14f4001d, 0x24cc0003, - 0x86800000, 0x80001915, 0x800019af, 0x80001a2b, 0x8000016a, 0xcc48032b, 0xcc480333, 0xcc48033b, - 0xcc480343, 0x98800011, 0xc4213246, 0xc4253245, 0x52200020, 0x7e26401a, 0x46640400, 0xc4313267, - 0x04203000, 0xce013267, 0xc4213267, 0x9a000001, 0x1b3c0057, 0x1b200213, 0x1b300199, 0x7e3e000a, - 0x7e32000a, 0xce000024, 0xc4970258, 0xc4930250, 0x51540020, 0x7d15001a, 0xc4af0280, 0xc4b30278, - 0x52ec0020, 0x7ef2c01a, 0x04180000, 0x04140020, 0x04280000, 0x7f438001, 0x8c001628, 0xc41d3247, - 0x25dc0001, 0x95c00068, 0xc4213254, 0x1a1c003e, 0x95c00065, 0xc01f007f, 0x7e1e0009, 0x97800062, - 0x0bb80001, 0x43bc0008, 0x7fcbc001, 0xc7df032b, 0x7e1fc00c, 0x97c0fffa, 0x043c0101, 0x94c00002, - 0x043c0102, 0xc439325b, 0x1bb0003f, 0x97000049, 0x1bb000e8, 0x33380003, 0x9b800046, 0x33300002, - 0x97000009, 0xc4393260, 0x1bb000e4, 0x33300004, 0x97000040, 0xc431325d, 0x27300010, 0x9b00fffe, - 0x80001994, 0x8c001628, 0xc033ffff, 0x2f3000ff, 0xc439325b, 0x7f3b0009, 0xcf01325b, 0xc439325b, - 0x27b800ff, 0x9b80fffe, 0xd8c00033, 0xc4300009, 0x27300008, 0x9700fffe, 0x19f003e6, 0x27380003, - 0x13b80004, 0x27300003, 0x13300003, 0x7fb38001, 0x19f000e8, 0x7fb38001, 0x13300001, 0x7fb38001, - 0x07b80002, 0xd8400013, 0x19f00064, 0x33300002, 0x97000009, 0x17b00005, 0x07300003, 0xcf012082, - 0xcc01203f, 0xd8400013, 0xcc01203f, 0x0b300003, 0x80001982, 0x17b00005, 0xcf012082, 0xcc01203f, - 0xd8400013, 0xcc01203f, 0x13300005, 0x7fb30002, 0xc4392083, 0x7fb38005, 0x27b80001, 0x9b80ffdf, - 0xd8c00034, 0xcdc00013, 0xc431325d, 0x27300010, 0x9b00fffe, 0xc439325b, 0x27b000ff, 0x9b00ffcb, - 0xcfc1325d, 0x2030007b, 0xcf01325b, 0x80001995, 0xcfc1325d, 0x04300001, 0x7f2b0014, 0x7ef2c01a, - 0x98800009, 0x41bc0007, 0x53fc0002, 0x7e7fc011, 0xd3c00025, 0xd8000026, 0xd8400027, 0xc43c0012, - 0x9bc0ffff, 0x653c0001, 0x7dbd8001, 0x06a80001, 0x09540001, 0x55100001, 0x9940ff8f, 0xc43c000e, - 0x2bfc0008, 0xcfc00013, 0x043c2000, 0xcfc13267, 0xd8080278, 0xd8080280, 0x80000168, 0x7c410001, - 0x04140000, 0xc55b0309, 0x3d5c0010, 0x2598ffff, 0x05540001, 0x7d91800c, 0x95c00003, 0xd4400078, - 0x80000168, 0x9580fff8, 0x09780001, 0xc4970258, 0xc4930250, 0x51540020, 0x7d15001a, 0xc4af0280, - 0xc4b30278, 0x52ec0020, 0x7ef2c01a, 0x04140020, 0x04280000, 0x65180001, 0x09540001, 0x55100001, - 0x9580005d, 0x8c001628, 0xc4253247, 0x26640001, 0x04200101, 0x96400058, 0x7dc24001, 0xc41d3248, - 0x25dc000f, 0x7df9c00c, 0x95c00053, 0x94c00002, 0x04200102, 0x7e41c001, 0xc425325b, 0x1a70003f, - 0x97000049, 0x1a7000e8, 0x33240003, 0x9a400046, 0x33300002, 0x9700000a, 0xc4253260, 0x1a7000e4, - 0x33300004, 0x97000040, 0xc431325d, 0x27300010, 0x9b00fffe, 0x80001a21, 0xcdc00013, 0xc033ffff, - 0x2f3000ff, 0xc425325b, 0x7f270009, 0xcf01325b, 0xc425325b, 0x266400ff, 0x9a40fffe, 0xd8c00033, - 0xc4300009, 0x27300008, 0x9700fffe, 0x19f003e6, 0x27240003, 0x12640004, 0x27300003, 0x13300003, - 0x7e724001, 0x19f000e8, 0x7e724001, 0x13300001, 0x7e724001, 0x06640002, 0xd8400013, 0x19f00064, - 0x33300002, 0x97000009, 0x16700005, 0x07300003, 0xcf012082, 0xcc01203f, 0xd8400013, 0xcc01203f, - 0x0b300003, 0x80001a0f, 0x16700005, 0xcf012082, 0xcc01203f, 0xd8400013, 0xcc01203f, 0x13300005, - 0x7e730002, 0xc4252083, 0x7e724005, 0x26640001, 0x9a40ffdf, 0xd8c00034, 0xcdc00013, 0xc431325d, - 0x27300010, 0x9b00fffe, 0xc425325b, 0x267000ff, 0x9b00ffca, 0xce01325d, 0x2030007b, 0xcf01325b, - 0x80001a22, 0xce01325d, 0x04300001, 0x7f2b0014, 0x7ef2c01a, 0x06a80001, 0x9940ff9f, 0xd4400078, - 0xd8080278, 0xd8080280, 0x80000168, 0x8c001a31, 0xd4400078, 0xd8080278, 0xd8080280, 0x7c408001, - 0x88000000, 0xc4213246, 0xc4253245, 0x52200020, 0x7e26401a, 0x46640400, 0xc4313267, 0x04203000, - 0xce013267, 0xc4213267, 0x9a000001, 0x1b180057, 0x1b200213, 0x1b300199, 0x7e1a000a, 0x7e32000a, - 0xce000024, 0xc4970258, 0xc4930250, 0x51540020, 0x7d15001a, 0xc4af0280, 0xc4b30278, 0x52ec0020, - 0x7ef2c01a, 0x04140020, 0x04280000, 0x65180001, 0x95800060, 0x8c001628, 0xc4193247, 0x25980001, - 0x04200101, 0x94c00005, 0x30f00005, 0x04200005, 0x9b000002, 0x04200102, 0x95800056, 0xc439325b, - 0x1bb0003f, 0x97000049, 0x1bb000e8, 0x33380003, 0x9b800046, 0x33300002, 0x9700000a, 0xc4393260, - 0x1bb000e4, 0x33300004, 0x97000040, 0xc431325d, 0x27300010, 0x9b00fffe, 0x80001aa2, 0xcdc00013, - 0xc033ffff, 0x2f3000ff, 0xc439325b, 0x7f3b0009, 0xcf01325b, 0xc439325b, 0x27b800ff, 0x9b80fffe, - 0xd8c00033, 0xc4300009, 0x27300008, 0x9700fffe, 0x19f003e6, 0x27380003, 0x13b80004, 0x27300003, - 0x13300003, 0x7fb38001, 0x19f000e8, 0x7fb38001, 0x13300001, 0x7fb38001, 0x07b80002, 0xd8400013, - 0x19f00064, 0x33300002, 0x97000009, 0x17b00005, 0x07300003, 0xcf012082, 0xcc01203f, 0xd8400013, - 0xcc01203f, 0x0b300003, 0x80001a90, 0x17b00005, 0xcf012082, 0xcc01203f, 0xd8400013, 0xcc01203f, - 0x13300005, 0x7fb30002, 0xc4392083, 0x7fb38005, 0x27b80001, 0x9b80ffdf, 0xd8c00034, 0xcdc00013, - 0xc431325d, 0x27300010, 0x9b00fffe, 0xc439325b, 0x27b000ff, 0x9b00ffca, 0xce01325d, 0x2030007b, - 0xcf00325b, 0x80001aa3, 0xce01325d, 0x04300001, 0x7f2b0014, 0x7ef2c01a, 0xc49b02e9, 0x99800005, - 0xd2400025, 0x4664001c, 0xd8000026, 0xd8400027, 0x06a80001, 0x09540001, 0x55100001, 0x9940ff9c, - 0xc49b02e9, 0x99800008, 0xc430000e, 0x2b300008, 0xcf000013, 0x04302000, 0xcf013267, 0xc4313267, - 0x97000001, 0x90000000, 0x244c00ff, 0xcc4c0200, 0x7c408001, 0x88000000, 0xc44f0200, 0xc410000b, - 0xc414000c, 0x7d158010, 0x059cc000, 0xd8400013, 0xccdd0000, 0x7c408001, 0x88000000, 0xc40c0037, - 0x94c0ffff, 0xcc000049, 0xc40c003a, 0x94c0ffff, 0x7c40c001, 0x24d00001, 0x9500e69a, 0x18d0003b, - 0x18d40021, 0x99400006, 0xd840004a, 0xc40c003c, 0x94c0ffff, 0x14cc0001, 0x94c00028, 0xd8000033, - 0xc438000b, 0xc43c0009, 0x27fc0001, 0x97c0fffe, 0xd8400013, 0xd841c07f, 0xc43dc07f, 0x1bfc0078, - 0x7ffbc00c, 0x97c0fffd, 0x99000004, 0xc0120840, 0x282c0040, 0x80001ae8, 0xc0121841, 0x282c001a, - 0xcd01c07c, 0xcc01c07d, 0xcc01c08c, 0xcc01c079, 0xcc01c07e, 0x04200004, 0xcec0001b, 0xd8400021, - 0x0a200001, 0x9a00ffff, 0xc425c07f, 0x166c001f, 0x04200004, 0x9ac0fffb, 0xc434000f, 0x9b40ffff, - 0xd801c07f, 0xd8400013, 0xc425c07f, 0xce400078, 0xd8000034, 0x9940e66b, 0xd800004a, 0x7c408001, - 0x88000000, 0xc40c0036, 0x24d00001, 0x9900fffe, 0x18cc0021, 0xccc00047, 0xcc000046, 0xc40c0039, - 0x94c0ffff, 0xc40c003d, 0x98c0ffff, 0x7c40c001, 0x24d003ff, 0x18d47fea, 0x18d87ff4, 0xcd00004c, - 0xcd40004e, 0xcd80004d, 0xd8400013, 0xcd41c405, 0xc02a0001, 0x2aa80001, 0xce800013, 0xcd01c406, - 0xcc01c406, 0xcc01c406, 0xc40c0006, 0x98c0ffff, 0xc414000e, 0x29540008, 0x295c0001, 0xcd400013, - 0xd8c1325e, 0xcdc0001a, 0x11980002, 0x4110000c, 0xc0160800, 0x7d15000a, 0xc0164010, 0xd8400013, - 0xcd41c078, 0xcc01c080, 0xcc01c081, 0xcd81c082, 0xcc01c083, 0xcd01c084, 0xc40c0006, 0x98c0ffff, - 0xd8400048, 0xc40c003b, 0x94c0ffff, 0x80000c16, 0xd8400013, 0xd801c40a, 0xd901c40d, 0xd801c410, - 0xd801c40e, 0xd801c40f, 0xc40c0040, 0x04140001, 0x09540001, 0x9940ffff, 0x04140096, 0xd8400013, - 0xccc1c400, 0xc411c401, 0x9500fffa, 0xc424003e, 0x04d00001, 0x11100002, 0xcd01c40c, 0xc0180034, - 0xcd81c411, 0xd841c414, 0x0a540001, 0xcd41c412, 0x2468000f, 0xc419c416, 0x41980003, 0xc41c003f, - 0x7dda0001, 0x12200002, 0x10cc0002, 0xccc1c40c, 0xd901c411, 0xce41c412, 0xd8800013, 0xce292e40, - 0xcc412e01, 0xcc412e02, 0xcc412e03, 0xcc412e00, 0x80000aa7, 0xc43c0007, 0xdc120000, 0x31144000, - 0x95400005, 0xdc030000, 0xd800002a, 0xcc3c000c, 0x80001b70, 0x33f80003, 0xd4400078, 0x9780e601, - 0x188cfff0, 0x04e40002, 0x80001190, 0x7c408001, 0x88000000, 0xc424005e, 0x96400006, 0x90000000, - 0xc424005e, 0x96400003, 0x7c408001, 0x88000000, 0x80001b74, 0x80000168, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0x92100004, 0x92110501, 0x92120206, 0x92130703, 0x92100400, 0x92110105, 0x92120602, 0x92130307, - 0xbf810000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - }, - .dfy_size = 7440 -}; - -static const PWR_DFY_Section pwr_virus_section4 = { - .dfy_cntl = 0x80000004, - .dfy_addr_hi = 0x000000b4, - .dfy_addr_lo = 0x54106500, - .dfy_data = { - 0x7e000200, 0x7e020204, 0xc00a0505, 0x00000000, 0xbf8c007f, 0xb8900904, 0xb8911a04, 0xb8920304, - 0xb8930b44, 0x921c0d0c, 0x921c1c13, 0x921d0c12, 0x811c1d1c, 0x811c111c, 0x921cff1c, 0x00000400, - 0x921dff10, 0x00000100, 0x81181d1c, 0x7e040218, 0xe0701000, 0x80050002, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0701000, 0x80050102, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0701000, 0x80050002, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0701000, 0x80050102, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0701000, 0x80050002, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0701000, 0x80050102, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, 0xe0501000, 0x80050302, - 0xbf810000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - }, - .dfy_size = 240 -}; - -static const PWR_DFY_Section pwr_virus_section5 = { - .dfy_cntl = 0x80000004, - .dfy_addr_hi = 0x000000b4, - .dfy_addr_lo = 0x54106900, - .dfy_data = { - 0x7e080200, 0x7e100204, 0xbefc00ff, 0x00010000, 0x24200087, 0x262200ff, 0x000001f0, 0x20222282, - 0x28182111, 0xd81a0000, 0x0000040c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd81a0000, 0x0000080c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd81a0000, 0x0000040c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd81a0000, - 0x0000080c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd81a0000, 0x0000040c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd81a0000, 0x0000080c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd81a0000, 0x0000040c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd81a0000, - 0x0000080c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd81a0000, 0x0000040c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd81a0000, 0x0000080c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd81a0000, 0x0000040c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd81a0000, - 0x0000080c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd81a0000, 0x0000040c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd81a0000, 0x0000080c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd81a0000, 0x0000040c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd81a0000, - 0x0000080c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd81a0000, 0x0000040c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd81a0000, 0x0000080c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd81a0000, 0x0000040c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd81a0000, - 0x0000080c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, 0x1100000c, 0xd86c0000, - 0x1100000c, 0xbf810000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - }, - .dfy_size = 384 -}; - -static const PWR_DFY_Section pwr_virus_section6 = { - .dfy_cntl = 0x80000004, - .dfy_addr_hi = 0x000000b4, - .dfy_addr_lo = 0x54116f00, - .dfy_data = { - 0xc0310800, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000040, 0x00000001, 0x00000001, 0x00000001, 0x00000000, 0xb4540fe8, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000041, 0x0000000c, 0x00000000, 0x07808000, 0xffffffff, - 0xffffffff, 0x00000000, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0x55555555, 0x55555555, 0x55555555, - 0x55555555, 0x00000000, 0x00000000, 0x540fee40, 0x000000b4, 0x00000010, 0x00000001, 0x00000004, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x54116f00, 0x000000b4, 0x00000000, 0x00000000, 0x00005301, 0x00000000, 0x00000000, 0x00000000, - 0xb4540fef, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x540fee20, 0x000000b4, 0x00000000, - 0x00000000, 0x08000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0xc0310800, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000040, 0x00000001, 0x00000001, 0x00000001, 0x00000000, 0xb454105e, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x000000c0, 0x00000010, 0x00000000, 0x07808000, 0xffffffff, - 0xffffffff, 0x00000000, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0x55555555, 0x55555555, 0x55555555, - 0x55555555, 0x00000000, 0x00000000, 0x540fee40, 0x000000b4, 0x00000010, 0x00000001, 0x00000004, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x54117300, 0x000000b4, 0x00000000, 0x00000000, 0x00005301, 0x00000000, 0x00000000, 0x00000000, - 0xb4540fef, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x540fee20, 0x000000b4, 0x00000000, - 0x00000000, 0x08000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0xc0310800, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000040, 0x00000001, 0x00000001, 0x00000001, 0x00000000, 0xb4541065, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000500, 0x0000001c, 0x00000000, 0x07808000, 0xffffffff, - 0xffffffff, 0x00000000, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0x55555555, 0x55555555, 0x55555555, - 0x55555555, 0x00000000, 0x00000000, 0x540fee40, 0x000000b4, 0x00000010, 0x00000001, 0x00000004, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x54117700, 0x000000b4, 0x00000000, 0x00000000, 0x00005301, 0x00000000, 0x00000000, 0x00000000, - 0xb4540fef, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x540fee20, 0x000000b4, 0x00000000, - 0x00000000, 0x08000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0xc0310800, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000040, 0x00000001, 0x00000001, 0x00000001, 0x00000000, 0xb4541069, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000444, 0x0000008a, 0x00000000, 0x07808000, 0xffffffff, - 0xffffffff, 0x00000000, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0x55555555, 0x55555555, 0x55555555, - 0x55555555, 0x00000000, 0x00000000, 0x540fee40, 0x000000b4, 0x00000010, 0x00000001, 0x00000004, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x54117b00, 0x000000b4, 0x00000000, 0x00000000, 0x00005301, 0x00000000, 0x00000000, 0x00000000, - 0xb4540fef, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x540fee20, 0x000000b4, 0x00000000, - 0x00000000, 0x08000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - }, - .dfy_size = 1024 -}; - -static const PWR_Command_Table PwrVirusTable_post[] = { - { 0x00000000, mmCP_MEC_CNTL }, - { 0x00000000, mmCP_MEC_CNTL }, - { 0x00000004, mmSRBM_GFX_CNTL }, - { 0x54116f00, mmCP_MQD_BASE_ADDR }, - { 0x000000b4, mmCP_MQD_BASE_ADDR_HI }, - { 0xb4540fef, mmCP_HQD_PQ_BASE }, - { 0x00000000, mmCP_HQD_PQ_BASE_HI }, - { 0x540fee20, mmCP_HQD_PQ_WPTR_POLL_ADDR }, - { 0x000000b4, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI }, - { 0x00005301, mmCP_HQD_PERSISTENT_STATE }, - { 0x00010000, mmCP_HQD_VMID }, - { 0xc8318509, mmCP_HQD_PQ_CONTROL }, - { 0x00000005, mmSRBM_GFX_CNTL }, - { 0x54117300, mmCP_MQD_BASE_ADDR }, - { 0x000000b4, mmCP_MQD_BASE_ADDR_HI }, - { 0xb4540fef, mmCP_HQD_PQ_BASE }, - { 0x00000000, mmCP_HQD_PQ_BASE_HI }, - { 0x540fee20, mmCP_HQD_PQ_WPTR_POLL_ADDR }, - { 0x000000b4, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI }, - { 0x00005301, mmCP_HQD_PERSISTENT_STATE }, - { 0x00010000, mmCP_HQD_VMID }, - { 0xc8318509, mmCP_HQD_PQ_CONTROL }, - { 0x00000006, mmSRBM_GFX_CNTL }, - { 0x54117700, mmCP_MQD_BASE_ADDR }, - { 0x000000b4, mmCP_MQD_BASE_ADDR_HI }, - { 0xb4540fef, mmCP_HQD_PQ_BASE }, - { 0x00000000, mmCP_HQD_PQ_BASE_HI }, - { 0x540fee20, mmCP_HQD_PQ_WPTR_POLL_ADDR }, - { 0x000000b4, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI }, - { 0x00005301, mmCP_HQD_PERSISTENT_STATE }, - { 0x00010000, mmCP_HQD_VMID }, - { 0xc8318509, mmCP_HQD_PQ_CONTROL }, - { 0x00000007, mmSRBM_GFX_CNTL }, - { 0x54117b00, mmCP_MQD_BASE_ADDR }, - { 0x000000b4, mmCP_MQD_BASE_ADDR_HI }, - { 0xb4540fef, mmCP_HQD_PQ_BASE }, - { 0x00000000, mmCP_HQD_PQ_BASE_HI }, - { 0x540fee20, mmCP_HQD_PQ_WPTR_POLL_ADDR }, - { 0x000000b4, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI }, - { 0x00005301, mmCP_HQD_PERSISTENT_STATE }, - { 0x00010000, mmCP_HQD_VMID }, - { 0xc8318509, mmCP_HQD_PQ_CONTROL }, - { 0x00000004, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000104, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000204, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000304, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000404, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000504, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000604, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000704, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000005, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000105, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000205, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000305, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000405, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000505, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000605, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000705, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000006, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000106, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000206, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000306, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000406, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000506, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000606, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000706, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000007, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000107, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000207, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000307, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000407, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000507, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000607, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000707, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000008, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000108, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000208, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000308, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000408, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000508, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000608, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000708, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000009, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000109, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000209, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000309, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000409, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000509, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000609, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000709, mmSRBM_GFX_CNTL }, - { 0x00000000, mmCP_HQD_ACTIVE }, - { 0x00000000, mmCP_HQD_PQ_RPTR }, - { 0x00000000, mmCP_HQD_PQ_WPTR }, - { 0x00000001, mmCP_HQD_ACTIVE }, - { 0x00000004, mmSRBM_GFX_CNTL }, - { 0x01010101, mmCP_PQ_WPTR_POLL_CNTL1 }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, mmGRBM_STATUS }, - { 0x00000000, 0xffffffff }, -}; - -#endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index 126b44d47a99..004a40e88bde 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -363,6 +363,12 @@ struct pp_hwmgr_func { int (*set_active_display_count)(struct pp_hwmgr *hwmgr, uint32_t count); int (*set_deep_sleep_dcefclk)(struct pp_hwmgr *hwmgr, uint32_t clock); int (*start_thermal_controller)(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *range); + int (*notify_cac_buffer_info)(struct pp_hwmgr *hwmgr, + uint32_t virtual_addr_low, + uint32_t virtual_addr_hi, + uint32_t mc_addr_low, + uint32_t mc_addr_hi, + uint32_t size); }; struct pp_table_func { diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu72.h b/drivers/gpu/drm/amd/powerplay/inc/smu72.h index b73d6b59ac32..08cd70c75d8b 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu72.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu72.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef SMU72_H #define SMU72_H diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu72_discrete.h b/drivers/gpu/drm/amd/powerplay/inc/smu72_discrete.h index 98f76e925e65..b2edbc0c3c4d 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu72_discrete.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu72_discrete.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef SMU72_DISCRETE_H #define SMU72_DISCRETE_H diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h index 7c9aba81cd6a..b1b27b2128f6 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h @@ -75,6 +75,11 @@ enum SMU_MEMBER { VceBootLevel, SamuBootLevel, LowSclkInterruptThreshold, + DRAM_LOG_ADDR_H, + DRAM_LOG_ADDR_L, + DRAM_LOG_PHY_ADDR_H, + DRAM_LOG_PHY_ADDR_L, + DRAM_LOG_BUFF_SIZE, }; diff --git a/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h index cb070ebc7de1..247c97397a27 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h @@ -124,12 +124,15 @@ typedef uint16_t PPSMC_Result; #define PPSMC_MSG_NumOfDisplays 0x56 #define PPSMC_MSG_ReadSerialNumTop32 0x58 #define PPSMC_MSG_ReadSerialNumBottom32 0x59 +#define PPSMC_MSG_SetSystemVirtualDramAddrHigh 0x5A +#define PPSMC_MSG_SetSystemVirtualDramAddrLow 0x5B #define PPSMC_MSG_RunAcgBtc 0x5C #define PPSMC_MSG_RunAcgInClosedLoop 0x5D #define PPSMC_MSG_RunAcgInOpenLoop 0x5E #define PPSMC_MSG_InitializeAcg 0x5F #define PPSMC_MSG_GetCurrPkgPwr 0x61 -#define PPSMC_Message_Count 0x62 +#define PPSMC_MSG_UpdatePkgPwrPidAlpha 0x68 +#define PPSMC_Message_Count 0x69 typedef int PPSMC_Msg; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/Makefile b/drivers/gpu/drm/amd/powerplay/smumgr/Makefile index 4e2988825ff6..30d3089d7dba 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/Makefile +++ b/drivers/gpu/drm/amd/powerplay/smumgr/Makefile @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the 'smu manager' sub-component of powerplay. # It provides the smu management services for the driver. -SMU_MGR = smumgr.o cz_smumgr.o tonga_smumgr.o fiji_smumgr.o fiji_smc.o \ - polaris10_smumgr.o iceland_smumgr.o polaris10_smc.o tonga_smc.o \ - smu7_smumgr.o iceland_smc.o vega10_smumgr.o rv_smumgr.o ci_smc.o +SMU_MGR = smumgr.o cz_smumgr.o tonga_smumgr.o fiji_smumgr.o \ + polaris10_smumgr.o iceland_smumgr.o \ + smu7_smumgr.o vega10_smumgr.o rv_smumgr.o ci_smumgr.o AMD_PP_SMUMGR = $(addprefix $(AMD_PP_PATH)/smumgr/,$(SMU_MGR)) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smc.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index 0017b9e62404..4d672cd15785 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smc.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -2266,6 +2266,16 @@ static uint32_t ci_get_offsetof(uint32_t type, uint32_t member) return offsetof(SMU7_SoftRegisters, PreVBlankGap); case VBlankTimeout: return offsetof(SMU7_SoftRegisters, VBlankTimeout); + case DRAM_LOG_ADDR_H: + return offsetof(SMU7_SoftRegisters, DRAM_LOG_ADDR_H); + case DRAM_LOG_ADDR_L: + return offsetof(SMU7_SoftRegisters, DRAM_LOG_ADDR_L); + case DRAM_LOG_PHY_ADDR_H: + return offsetof(SMU7_SoftRegisters, DRAM_LOG_PHY_ADDR_H); + case DRAM_LOG_PHY_ADDR_L: + return offsetof(SMU7_SoftRegisters, DRAM_LOG_PHY_ADDR_L); + case DRAM_LOG_BUFF_SIZE: + return offsetof(SMU7_SoftRegisters, DRAM_LOG_BUFF_SIZE); } case SMU_Discrete_DpmTable: switch (member) { diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.c deleted file mode 100644 index b1a66b5ada4a..000000000000 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.c +++ /dev/null @@ -1,2486 +0,0 @@ -/* - * Copyright 2015 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "pp_debug.h" -#include "fiji_smc.h" -#include "smu7_dyn_defaults.h" - -#include "smu7_hwmgr.h" -#include "hardwaremanager.h" -#include "ppatomctrl.h" -#include "cgs_common.h" -#include "atombios.h" -#include "fiji_smumgr.h" -#include "pppcielanes.h" -#include "smu7_ppsmc.h" -#include "smu73.h" -#include "smu/smu_7_1_3_d.h" -#include "smu/smu_7_1_3_sh_mask.h" -#include "gmc/gmc_8_1_d.h" -#include "gmc/gmc_8_1_sh_mask.h" -#include "bif/bif_5_0_d.h" -#include "bif/bif_5_0_sh_mask.h" -#include "dce/dce_10_0_d.h" -#include "dce/dce_10_0_sh_mask.h" -#include "smu7_smumgr.h" - -#define VOLTAGE_SCALE 4 -#define POWERTUNE_DEFAULT_SET_MAX 1 -#define VOLTAGE_VID_OFFSET_SCALE1 625 -#define VOLTAGE_VID_OFFSET_SCALE2 100 -#define VDDC_VDDCI_DELTA 300 -#define MC_CG_ARB_FREQ_F1 0x0b - -/* [2.5%,~2.5%] Clock stretched is multiple of 2.5% vs - * not and [Fmin, Fmax, LDO_REFSEL, USE_FOR_LOW_FREQ] - */ -static const uint16_t fiji_clock_stretcher_lookup_table[2][4] = { - {600, 1050, 3, 0}, {600, 1050, 6, 1} }; - -/* [FF, SS] type, [] 4 voltage ranges, and - * [Floor Freq, Boundary Freq, VID min , VID max] - */ -static const uint32_t fiji_clock_stretcher_ddt_table[2][4][4] = { - { {265, 529, 120, 128}, {325, 650, 96, 119}, {430, 860, 32, 95}, {0, 0, 0, 31} }, - { {275, 550, 104, 112}, {319, 638, 96, 103}, {360, 720, 64, 95}, {384, 768, 32, 63} } }; - -/* [Use_For_Low_freq] value, [0%, 5%, 10%, 7.14%, 14.28%, 20%] - * (coming from PWR_CKS_CNTL.stretch_amount reg spec) - */ -static const uint8_t fiji_clock_stretch_amount_conversion[2][6] = { - {0, 1, 3, 2, 4, 5}, {0, 2, 4, 5, 6, 5} }; - -static const struct fiji_pt_defaults fiji_power_tune_data_set_array[POWERTUNE_DEFAULT_SET_MAX] = { - /*sviLoadLIneEn, SviLoadLineVddC, TDC_VDDC_ThrottleReleaseLimitPerc */ - {1, 0xF, 0xFD, - /* TDC_MAWt, TdcWaterfallCtl, DTEAmbientTempBase */ - 0x19, 5, 45} -}; - -/* PPGen has the gain setting generated in x * 100 unit - * This function is to convert the unit to x * 4096(0x1000) unit. - * This is the unit expected by SMC firmware - */ -static int fiji_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr, - struct phm_ppt_v1_clock_voltage_dependency_table *dep_table, - uint32_t clock, uint32_t *voltage, uint32_t *mvdd) -{ - uint32_t i; - uint16_t vddci; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - *voltage = *mvdd = 0; - - - /* clock - voltage dependency table is empty table */ - if (dep_table->count == 0) - return -EINVAL; - - for (i = 0; i < dep_table->count; i++) { - /* find first sclk bigger than request */ - if (dep_table->entries[i].clk >= clock) { - *voltage |= (dep_table->entries[i].vddc * - VOLTAGE_SCALE) << VDDC_SHIFT; - if (SMU7_VOLTAGE_CONTROL_NONE == data->vddci_control) - *voltage |= (data->vbios_boot_state.vddci_bootup_value * - VOLTAGE_SCALE) << VDDCI_SHIFT; - else if (dep_table->entries[i].vddci) - *voltage |= (dep_table->entries[i].vddci * - VOLTAGE_SCALE) << VDDCI_SHIFT; - else { - vddci = phm_find_closest_vddci(&(data->vddci_voltage_table), - (dep_table->entries[i].vddc - - VDDC_VDDCI_DELTA)); - *voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; - } - - if (SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) - *mvdd = data->vbios_boot_state.mvdd_bootup_value * - VOLTAGE_SCALE; - else if (dep_table->entries[i].mvdd) - *mvdd = (uint32_t) dep_table->entries[i].mvdd * - VOLTAGE_SCALE; - - *voltage |= 1 << PHASES_SHIFT; - return 0; - } - } - - /* sclk is bigger than max sclk in the dependence table */ - *voltage |= (dep_table->entries[i - 1].vddc * VOLTAGE_SCALE) << VDDC_SHIFT; - - if (SMU7_VOLTAGE_CONTROL_NONE == data->vddci_control) - *voltage |= (data->vbios_boot_state.vddci_bootup_value * - VOLTAGE_SCALE) << VDDCI_SHIFT; - else if (dep_table->entries[i-1].vddci) { - vddci = phm_find_closest_vddci(&(data->vddci_voltage_table), - (dep_table->entries[i].vddc - - VDDC_VDDCI_DELTA)); - *voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; - } - - if (SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) - *mvdd = data->vbios_boot_state.mvdd_bootup_value * VOLTAGE_SCALE; - else if (dep_table->entries[i].mvdd) - *mvdd = (uint32_t) dep_table->entries[i - 1].mvdd * VOLTAGE_SCALE; - - return 0; -} - - -static uint16_t scale_fan_gain_settings(uint16_t raw_setting) -{ - uint32_t tmp; - tmp = raw_setting * 4096 / 100; - return (uint16_t)tmp; -} - -static void get_scl_sda_value(uint8_t line, uint8_t *scl, uint8_t *sda) -{ - switch (line) { - case SMU7_I2CLineID_DDC1: - *scl = SMU7_I2C_DDC1CLK; - *sda = SMU7_I2C_DDC1DATA; - break; - case SMU7_I2CLineID_DDC2: - *scl = SMU7_I2C_DDC2CLK; - *sda = SMU7_I2C_DDC2DATA; - break; - case SMU7_I2CLineID_DDC3: - *scl = SMU7_I2C_DDC3CLK; - *sda = SMU7_I2C_DDC3DATA; - break; - case SMU7_I2CLineID_DDC4: - *scl = SMU7_I2C_DDC4CLK; - *sda = SMU7_I2C_DDC4DATA; - break; - case SMU7_I2CLineID_DDC5: - *scl = SMU7_I2C_DDC5CLK; - *sda = SMU7_I2C_DDC5DATA; - break; - case SMU7_I2CLineID_DDC6: - *scl = SMU7_I2C_DDC6CLK; - *sda = SMU7_I2C_DDC6DATA; - break; - case SMU7_I2CLineID_SCLSDA: - *scl = SMU7_I2C_SCL; - *sda = SMU7_I2C_SDA; - break; - case SMU7_I2CLineID_DDCVGA: - *scl = SMU7_I2C_DDCVGACLK; - *sda = SMU7_I2C_DDCVGADATA; - break; - default: - *scl = 0; - *sda = 0; - break; - } -} - -static void fiji_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr) -{ - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - - if (table_info && - table_info->cac_dtp_table->usPowerTuneDataSetID <= POWERTUNE_DEFAULT_SET_MAX && - table_info->cac_dtp_table->usPowerTuneDataSetID) - smu_data->power_tune_defaults = - &fiji_power_tune_data_set_array - [table_info->cac_dtp_table->usPowerTuneDataSetID - 1]; - else - smu_data->power_tune_defaults = &fiji_power_tune_data_set_array[0]; - -} - -static int fiji_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr) -{ - - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - const struct fiji_pt_defaults *defaults = smu_data->power_tune_defaults; - - SMU73_Discrete_DpmTable *dpm_table = &(smu_data->smc_state_table); - - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_cac_tdp_table *cac_dtp_table = table_info->cac_dtp_table; - struct pp_advance_fan_control_parameters *fan_table = - &hwmgr->thermal_controller.advanceFanControlParameters; - uint8_t uc_scl, uc_sda; - - /* TDP number of fraction bits are changed from 8 to 7 for Fiji - * as requested by SMC team - */ - dpm_table->DefaultTdp = PP_HOST_TO_SMC_US( - (uint16_t)(cac_dtp_table->usTDP * 128)); - dpm_table->TargetTdp = PP_HOST_TO_SMC_US( - (uint16_t)(cac_dtp_table->usTDP * 128)); - - PP_ASSERT_WITH_CODE(cac_dtp_table->usTargetOperatingTemp <= 255, - "Target Operating Temp is out of Range!", - ); - - dpm_table->GpuTjMax = (uint8_t)(cac_dtp_table->usTargetOperatingTemp); - dpm_table->GpuTjHyst = 8; - - dpm_table->DTEAmbientTempBase = defaults->DTEAmbientTempBase; - - /* The following are for new Fiji Multi-input fan/thermal control */ - dpm_table->TemperatureLimitEdge = PP_HOST_TO_SMC_US( - cac_dtp_table->usTargetOperatingTemp * 256); - dpm_table->TemperatureLimitHotspot = PP_HOST_TO_SMC_US( - cac_dtp_table->usTemperatureLimitHotspot * 256); - dpm_table->TemperatureLimitLiquid1 = PP_HOST_TO_SMC_US( - cac_dtp_table->usTemperatureLimitLiquid1 * 256); - dpm_table->TemperatureLimitLiquid2 = PP_HOST_TO_SMC_US( - cac_dtp_table->usTemperatureLimitLiquid2 * 256); - dpm_table->TemperatureLimitVrVddc = PP_HOST_TO_SMC_US( - cac_dtp_table->usTemperatureLimitVrVddc * 256); - dpm_table->TemperatureLimitVrMvdd = PP_HOST_TO_SMC_US( - cac_dtp_table->usTemperatureLimitVrMvdd * 256); - dpm_table->TemperatureLimitPlx = PP_HOST_TO_SMC_US( - cac_dtp_table->usTemperatureLimitPlx * 256); - - dpm_table->FanGainEdge = PP_HOST_TO_SMC_US( - scale_fan_gain_settings(fan_table->usFanGainEdge)); - dpm_table->FanGainHotspot = PP_HOST_TO_SMC_US( - scale_fan_gain_settings(fan_table->usFanGainHotspot)); - dpm_table->FanGainLiquid = PP_HOST_TO_SMC_US( - scale_fan_gain_settings(fan_table->usFanGainLiquid)); - dpm_table->FanGainVrVddc = PP_HOST_TO_SMC_US( - scale_fan_gain_settings(fan_table->usFanGainVrVddc)); - dpm_table->FanGainVrMvdd = PP_HOST_TO_SMC_US( - scale_fan_gain_settings(fan_table->usFanGainVrMvdd)); - dpm_table->FanGainPlx = PP_HOST_TO_SMC_US( - scale_fan_gain_settings(fan_table->usFanGainPlx)); - dpm_table->FanGainHbm = PP_HOST_TO_SMC_US( - scale_fan_gain_settings(fan_table->usFanGainHbm)); - - dpm_table->Liquid1_I2C_address = cac_dtp_table->ucLiquid1_I2C_address; - dpm_table->Liquid2_I2C_address = cac_dtp_table->ucLiquid2_I2C_address; - dpm_table->Vr_I2C_address = cac_dtp_table->ucVr_I2C_address; - dpm_table->Plx_I2C_address = cac_dtp_table->ucPlx_I2C_address; - - get_scl_sda_value(cac_dtp_table->ucLiquid_I2C_Line, &uc_scl, &uc_sda); - dpm_table->Liquid_I2C_LineSCL = uc_scl; - dpm_table->Liquid_I2C_LineSDA = uc_sda; - - get_scl_sda_value(cac_dtp_table->ucVr_I2C_Line, &uc_scl, &uc_sda); - dpm_table->Vr_I2C_LineSCL = uc_scl; - dpm_table->Vr_I2C_LineSDA = uc_sda; - - get_scl_sda_value(cac_dtp_table->ucPlx_I2C_Line, &uc_scl, &uc_sda); - dpm_table->Plx_I2C_LineSCL = uc_scl; - dpm_table->Plx_I2C_LineSDA = uc_sda; - - return 0; -} - - -static int fiji_populate_svi_load_line(struct pp_hwmgr *hwmgr) -{ - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - const struct fiji_pt_defaults *defaults = smu_data->power_tune_defaults; - - smu_data->power_tune_table.SviLoadLineEn = defaults->SviLoadLineEn; - smu_data->power_tune_table.SviLoadLineVddC = defaults->SviLoadLineVddC; - smu_data->power_tune_table.SviLoadLineTrimVddC = 3; - smu_data->power_tune_table.SviLoadLineOffsetVddC = 0; - - return 0; -} - - -static int fiji_populate_tdc_limit(struct pp_hwmgr *hwmgr) -{ - uint16_t tdc_limit; - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - const struct fiji_pt_defaults *defaults = smu_data->power_tune_defaults; - - /* TDC number of fraction bits are changed from 8 to 7 - * for Fiji as requested by SMC team - */ - tdc_limit = (uint16_t)(table_info->cac_dtp_table->usTDC * 128); - smu_data->power_tune_table.TDC_VDDC_PkgLimit = - CONVERT_FROM_HOST_TO_SMC_US(tdc_limit); - smu_data->power_tune_table.TDC_VDDC_ThrottleReleaseLimitPerc = - defaults->TDC_VDDC_ThrottleReleaseLimitPerc; - smu_data->power_tune_table.TDC_MAWt = defaults->TDC_MAWt; - - return 0; -} - -static int fiji_populate_dw8(struct pp_hwmgr *hwmgr, uint32_t fuse_table_offset) -{ - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - const struct fiji_pt_defaults *defaults = smu_data->power_tune_defaults; - uint32_t temp; - - if (smu7_read_smc_sram_dword(hwmgr, - fuse_table_offset + - offsetof(SMU73_Discrete_PmFuses, TdcWaterfallCtl), - (uint32_t *)&temp, SMC_RAM_END)) - PP_ASSERT_WITH_CODE(false, - "Attempt to read PmFuses.DW6 (SviLoadLineEn) from SMC Failed!", - return -EINVAL); - else { - smu_data->power_tune_table.TdcWaterfallCtl = defaults->TdcWaterfallCtl; - smu_data->power_tune_table.LPMLTemperatureMin = - (uint8_t)((temp >> 16) & 0xff); - smu_data->power_tune_table.LPMLTemperatureMax = - (uint8_t)((temp >> 8) & 0xff); - smu_data->power_tune_table.Reserved = (uint8_t)(temp & 0xff); - } - return 0; -} - -static int fiji_populate_temperature_scaler(struct pp_hwmgr *hwmgr) -{ - int i; - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - - /* Currently not used. Set all to zero. */ - for (i = 0; i < 16; i++) - smu_data->power_tune_table.LPMLTemperatureScaler[i] = 0; - - return 0; -} - -static int fiji_populate_fuzzy_fan(struct pp_hwmgr *hwmgr) -{ - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - - if ((hwmgr->thermal_controller.advanceFanControlParameters. - usFanOutputSensitivity & (1 << 15)) || - 0 == hwmgr->thermal_controller.advanceFanControlParameters. - usFanOutputSensitivity) - hwmgr->thermal_controller.advanceFanControlParameters. - usFanOutputSensitivity = hwmgr->thermal_controller. - advanceFanControlParameters.usDefaultFanOutputSensitivity; - - smu_data->power_tune_table.FuzzyFan_PwmSetDelta = - PP_HOST_TO_SMC_US(hwmgr->thermal_controller. - advanceFanControlParameters.usFanOutputSensitivity); - return 0; -} - -static int fiji_populate_gnb_lpml(struct pp_hwmgr *hwmgr) -{ - int i; - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - - /* Currently not used. Set all to zero. */ - for (i = 0; i < 16; i++) - smu_data->power_tune_table.GnbLPML[i] = 0; - - return 0; -} - -static int fiji_populate_bapm_vddc_base_leakage_sidd(struct pp_hwmgr *hwmgr) -{ - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - uint16_t HiSidd = smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd; - uint16_t LoSidd = smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd; - struct phm_cac_tdp_table *cac_table = table_info->cac_dtp_table; - - HiSidd = (uint16_t)(cac_table->usHighCACLeakage / 100 * 256); - LoSidd = (uint16_t)(cac_table->usLowCACLeakage / 100 * 256); - - smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd = - CONVERT_FROM_HOST_TO_SMC_US(HiSidd); - smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd = - CONVERT_FROM_HOST_TO_SMC_US(LoSidd); - - return 0; -} - -static int fiji_populate_pm_fuses(struct pp_hwmgr *hwmgr) -{ - uint32_t pm_fuse_table_offset; - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_PowerContainment)) { - if (smu7_read_smc_sram_dword(hwmgr, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU73_Firmware_Header, PmFuseTable), - &pm_fuse_table_offset, SMC_RAM_END)) - PP_ASSERT_WITH_CODE(false, - "Attempt to get pm_fuse_table_offset Failed!", - return -EINVAL); - - /* DW6 */ - if (fiji_populate_svi_load_line(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate SviLoadLine Failed!", - return -EINVAL); - /* DW7 */ - if (fiji_populate_tdc_limit(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate TDCLimit Failed!", return -EINVAL); - /* DW8 */ - if (fiji_populate_dw8(hwmgr, pm_fuse_table_offset)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate TdcWaterfallCtl, " - "LPMLTemperature Min and Max Failed!", - return -EINVAL); - - /* DW9-DW12 */ - if (0 != fiji_populate_temperature_scaler(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate LPMLTemperatureScaler Failed!", - return -EINVAL); - - /* DW13-DW14 */ - if (fiji_populate_fuzzy_fan(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate Fuzzy Fan Control parameters Failed!", - return -EINVAL); - - /* DW15-DW18 */ - if (fiji_populate_gnb_lpml(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate GnbLPML Failed!", - return -EINVAL); - - /* DW20 */ - if (fiji_populate_bapm_vddc_base_leakage_sidd(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate BapmVddCBaseLeakage Hi and Lo " - "Sidd Failed!", return -EINVAL); - - if (smu7_copy_bytes_to_smc(hwmgr, pm_fuse_table_offset, - (uint8_t *)&smu_data->power_tune_table, - sizeof(struct SMU73_Discrete_PmFuses), SMC_RAM_END)) - PP_ASSERT_WITH_CODE(false, - "Attempt to download PmFuseTable Failed!", - return -EINVAL); - } - return 0; -} - -/** -* Preparation of vddc and vddgfx CAC tables for SMC. -* -* @param hwmgr the address of the hardware manager -* @param table the SMC DPM table structure to be populated -* @return always 0 -*/ -static int fiji_populate_cac_table(struct pp_hwmgr *hwmgr, - struct SMU73_Discrete_DpmTable *table) -{ - uint32_t count; - uint8_t index; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_ppt_v1_voltage_lookup_table *lookup_table = - table_info->vddc_lookup_table; - /* tables is already swapped, so in order to use the value from it, - * we need to swap it back. - * We are populating vddc CAC data to BapmVddc table - * in split and merged mode - */ - - for (count = 0; count < lookup_table->count; count++) { - index = phm_get_voltage_index(lookup_table, - data->vddc_voltage_table.entries[count].value); - table->BapmVddcVidLoSidd[count] = - convert_to_vid(lookup_table->entries[index].us_cac_low); - table->BapmVddcVidHiSidd[count] = - convert_to_vid(lookup_table->entries[index].us_cac_high); - } - - return 0; -} - -/** -* Preparation of voltage tables for SMC. -* -* @param hwmgr the address of the hardware manager -* @param table the SMC DPM table structure to be populated -* @return always 0 -*/ - -static int fiji_populate_smc_voltage_tables(struct pp_hwmgr *hwmgr, - struct SMU73_Discrete_DpmTable *table) -{ - int result; - - result = fiji_populate_cac_table(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "can not populate CAC voltage tables to SMC", - return -EINVAL); - - return 0; -} - -static int fiji_populate_ulv_level(struct pp_hwmgr *hwmgr, - struct SMU73_Discrete_Ulv *state) -{ - int result = 0; - - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - - state->CcPwrDynRm = 0; - state->CcPwrDynRm1 = 0; - - state->VddcOffset = (uint16_t) table_info->us_ulv_voltage_offset; - state->VddcOffsetVid = (uint8_t)(table_info->us_ulv_voltage_offset * - VOLTAGE_VID_OFFSET_SCALE2 / VOLTAGE_VID_OFFSET_SCALE1); - - state->VddcPhase = 1; - - if (!result) { - CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm); - CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm1); - CONVERT_FROM_HOST_TO_SMC_US(state->VddcOffset); - } - return result; -} - -static int fiji_populate_ulv_state(struct pp_hwmgr *hwmgr, - struct SMU73_Discrete_DpmTable *table) -{ - return fiji_populate_ulv_level(hwmgr, &table->Ulv); -} - -static int fiji_populate_smc_link_level(struct pp_hwmgr *hwmgr, - struct SMU73_Discrete_DpmTable *table) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct smu7_dpm_table *dpm_table = &data->dpm_table; - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - int i; - - /* Index (dpm_table->pcie_speed_table.count) - * is reserved for PCIE boot level. */ - for (i = 0; i <= dpm_table->pcie_speed_table.count; i++) { - table->LinkLevel[i].PcieGenSpeed = - (uint8_t)dpm_table->pcie_speed_table.dpm_levels[i].value; - table->LinkLevel[i].PcieLaneCount = (uint8_t)encode_pcie_lane_width( - dpm_table->pcie_speed_table.dpm_levels[i].param1); - table->LinkLevel[i].EnabledForActivity = 1; - table->LinkLevel[i].SPC = (uint8_t)(data->pcie_spc_cap & 0xff); - table->LinkLevel[i].DownThreshold = PP_HOST_TO_SMC_UL(5); - table->LinkLevel[i].UpThreshold = PP_HOST_TO_SMC_UL(30); - } - - smu_data->smc_state_table.LinkLevelCount = - (uint8_t)dpm_table->pcie_speed_table.count; - data->dpm_level_enable_mask.pcie_dpm_enable_mask = - phm_get_dpm_level_enable_mask_value(&dpm_table->pcie_speed_table); - - return 0; -} - - -/** -* Calculates the SCLK dividers using the provided engine clock -* -* @param hwmgr the address of the hardware manager -* @param clock the engine clock to use to populate the structure -* @param sclk the SMC SCLK structure to be populated -*/ -static int fiji_calculate_sclk_params(struct pp_hwmgr *hwmgr, - uint32_t clock, struct SMU73_Discrete_GraphicsLevel *sclk) -{ - const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct pp_atomctrl_clock_dividers_vi dividers; - uint32_t spll_func_cntl = data->clock_registers.vCG_SPLL_FUNC_CNTL; - uint32_t spll_func_cntl_3 = data->clock_registers.vCG_SPLL_FUNC_CNTL_3; - uint32_t spll_func_cntl_4 = data->clock_registers.vCG_SPLL_FUNC_CNTL_4; - uint32_t cg_spll_spread_spectrum = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM; - uint32_t cg_spll_spread_spectrum_2 = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM_2; - uint32_t ref_clock; - uint32_t ref_divider; - uint32_t fbdiv; - int result; - - /* get the engine clock dividers for this clock value */ - result = atomctrl_get_engine_pll_dividers_vi(hwmgr, clock, ÷rs); - - PP_ASSERT_WITH_CODE(result == 0, - "Error retrieving Engine Clock dividers from VBIOS.", - return result); - - /* To get FBDIV we need to multiply this by 16384 and divide it by Fref. */ - ref_clock = atomctrl_get_reference_clock(hwmgr); - ref_divider = 1 + dividers.uc_pll_ref_div; - - /* low 14 bits is fraction and high 12 bits is divider */ - fbdiv = dividers.ul_fb_div.ul_fb_divider & 0x3FFFFFF; - - /* SPLL_FUNC_CNTL setup */ - spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, CG_SPLL_FUNC_CNTL, - SPLL_REF_DIV, dividers.uc_pll_ref_div); - spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, CG_SPLL_FUNC_CNTL, - SPLL_PDIV_A, dividers.uc_pll_post_div); - - /* SPLL_FUNC_CNTL_3 setup*/ - spll_func_cntl_3 = PHM_SET_FIELD(spll_func_cntl_3, CG_SPLL_FUNC_CNTL_3, - SPLL_FB_DIV, fbdiv); - - /* set to use fractional accumulation*/ - spll_func_cntl_3 = PHM_SET_FIELD(spll_func_cntl_3, CG_SPLL_FUNC_CNTL_3, - SPLL_DITHEN, 1); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_EngineSpreadSpectrumSupport)) { - struct pp_atomctrl_internal_ss_info ssInfo; - - uint32_t vco_freq = clock * dividers.uc_pll_post_div; - if (!atomctrl_get_engine_clock_spread_spectrum(hwmgr, - vco_freq, &ssInfo)) { - /* - * ss_info.speed_spectrum_percentage -- in unit of 0.01% - * ss_info.speed_spectrum_rate -- in unit of khz - * - * clks = reference_clock * 10 / (REFDIV + 1) / speed_spectrum_rate / 2 - */ - uint32_t clk_s = ref_clock * 5 / - (ref_divider * ssInfo.speed_spectrum_rate); - /* clkv = 2 * D * fbdiv / NS */ - uint32_t clk_v = 4 * ssInfo.speed_spectrum_percentage * - fbdiv / (clk_s * 10000); - - cg_spll_spread_spectrum = PHM_SET_FIELD(cg_spll_spread_spectrum, - CG_SPLL_SPREAD_SPECTRUM, CLKS, clk_s); - cg_spll_spread_spectrum = PHM_SET_FIELD(cg_spll_spread_spectrum, - CG_SPLL_SPREAD_SPECTRUM, SSEN, 1); - cg_spll_spread_spectrum_2 = PHM_SET_FIELD(cg_spll_spread_spectrum_2, - CG_SPLL_SPREAD_SPECTRUM_2, CLKV, clk_v); - } - } - - sclk->SclkFrequency = clock; - sclk->CgSpllFuncCntl3 = spll_func_cntl_3; - sclk->CgSpllFuncCntl4 = spll_func_cntl_4; - sclk->SpllSpreadSpectrum = cg_spll_spread_spectrum; - sclk->SpllSpreadSpectrum2 = cg_spll_spread_spectrum_2; - sclk->SclkDid = (uint8_t)dividers.pll_post_divider; - - return 0; -} - -/** -* Populates single SMC SCLK structure using the provided engine clock -* -* @param hwmgr the address of the hardware manager -* @param clock the engine clock to use to populate the structure -* @param sclk the SMC SCLK structure to be populated -*/ - -static int fiji_populate_single_graphic_level(struct pp_hwmgr *hwmgr, - uint32_t clock, uint16_t sclk_al_threshold, - struct SMU73_Discrete_GraphicsLevel *level) -{ - int result; - /* PP_Clocks minClocks; */ - uint32_t threshold, mvdd; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - - result = fiji_calculate_sclk_params(hwmgr, clock, level); - - /* populate graphics levels */ - result = fiji_get_dependency_volt_by_clk(hwmgr, - table_info->vdd_dep_on_sclk, clock, - (uint32_t *)(&level->MinVoltage), &mvdd); - PP_ASSERT_WITH_CODE((0 == result), - "can not find VDDC voltage value for " - "VDDC engine clock dependency table", - return result); - - level->SclkFrequency = clock; - level->ActivityLevel = sclk_al_threshold; - level->CcPwrDynRm = 0; - level->CcPwrDynRm1 = 0; - level->EnabledForActivity = 0; - level->EnabledForThrottle = 1; - level->UpHyst = 10; - level->DownHyst = 0; - level->VoltageDownHyst = 0; - level->PowerThrottle = 0; - - threshold = clock * data->fast_watermark_threshold / 100; - - data->display_timing.min_clock_in_sr = hwmgr->display_config.min_core_set_clock_in_sr; - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) - level->DeepSleepDivId = smu7_get_sleep_divider_id_from_clock(clock, - hwmgr->display_config.min_core_set_clock_in_sr); - - - /* Default to slow, highest DPM level will be - * set to PPSMC_DISPLAY_WATERMARK_LOW later. - */ - level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - - CONVERT_FROM_HOST_TO_SMC_UL(level->MinVoltage); - CONVERT_FROM_HOST_TO_SMC_UL(level->SclkFrequency); - CONVERT_FROM_HOST_TO_SMC_US(level->ActivityLevel); - CONVERT_FROM_HOST_TO_SMC_UL(level->CgSpllFuncCntl3); - CONVERT_FROM_HOST_TO_SMC_UL(level->CgSpllFuncCntl4); - CONVERT_FROM_HOST_TO_SMC_UL(level->SpllSpreadSpectrum); - CONVERT_FROM_HOST_TO_SMC_UL(level->SpllSpreadSpectrum2); - CONVERT_FROM_HOST_TO_SMC_UL(level->CcPwrDynRm); - CONVERT_FROM_HOST_TO_SMC_UL(level->CcPwrDynRm1); - - return 0; -} -/** -* Populates all SMC SCLK levels' structure based on the trimmed allowed dpm engine clock states -* -* @param hwmgr the address of the hardware manager -*/ -int fiji_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - - struct smu7_dpm_table *dpm_table = &data->dpm_table; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_ppt_v1_pcie_table *pcie_table = table_info->pcie_table; - uint8_t pcie_entry_cnt = (uint8_t) data->dpm_table.pcie_speed_table.count; - int result = 0; - uint32_t array = smu_data->smu7_data.dpm_table_start + - offsetof(SMU73_Discrete_DpmTable, GraphicsLevel); - uint32_t array_size = sizeof(struct SMU73_Discrete_GraphicsLevel) * - SMU73_MAX_LEVELS_GRAPHICS; - struct SMU73_Discrete_GraphicsLevel *levels = - smu_data->smc_state_table.GraphicsLevel; - uint32_t i, max_entry; - uint8_t hightest_pcie_level_enabled = 0, - lowest_pcie_level_enabled = 0, - mid_pcie_level_enabled = 0, - count = 0; - - for (i = 0; i < dpm_table->sclk_table.count; i++) { - result = fiji_populate_single_graphic_level(hwmgr, - dpm_table->sclk_table.dpm_levels[i].value, - (uint16_t)smu_data->activity_target[i], - &levels[i]); - if (result) - return result; - - /* Making sure only DPM level 0-1 have Deep Sleep Div ID populated. */ - if (i > 1) - levels[i].DeepSleepDivId = 0; - } - - /* Only enable level 0 for now.*/ - levels[0].EnabledForActivity = 1; - - /* set highest level watermark to high */ - levels[dpm_table->sclk_table.count - 1].DisplayWatermark = - PPSMC_DISPLAY_WATERMARK_HIGH; - - smu_data->smc_state_table.GraphicsDpmLevelCount = - (uint8_t)dpm_table->sclk_table.count; - data->dpm_level_enable_mask.sclk_dpm_enable_mask = - phm_get_dpm_level_enable_mask_value(&dpm_table->sclk_table); - - if (pcie_table != NULL) { - PP_ASSERT_WITH_CODE((1 <= pcie_entry_cnt), - "There must be 1 or more PCIE levels defined in PPTable.", - return -EINVAL); - max_entry = pcie_entry_cnt - 1; - for (i = 0; i < dpm_table->sclk_table.count; i++) - levels[i].pcieDpmLevel = - (uint8_t) ((i < max_entry) ? i : max_entry); - } else { - while (data->dpm_level_enable_mask.pcie_dpm_enable_mask && - ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & - (1 << (hightest_pcie_level_enabled + 1))) != 0)) - hightest_pcie_level_enabled++; - - while (data->dpm_level_enable_mask.pcie_dpm_enable_mask && - ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & - (1 << lowest_pcie_level_enabled)) == 0)) - lowest_pcie_level_enabled++; - - while ((count < hightest_pcie_level_enabled) && - ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & - (1 << (lowest_pcie_level_enabled + 1 + count))) == 0)) - count++; - - mid_pcie_level_enabled = (lowest_pcie_level_enabled + 1 + count) < - hightest_pcie_level_enabled ? - (lowest_pcie_level_enabled + 1 + count) : - hightest_pcie_level_enabled; - - /* set pcieDpmLevel to hightest_pcie_level_enabled */ - for (i = 2; i < dpm_table->sclk_table.count; i++) - levels[i].pcieDpmLevel = hightest_pcie_level_enabled; - - /* set pcieDpmLevel to lowest_pcie_level_enabled */ - levels[0].pcieDpmLevel = lowest_pcie_level_enabled; - - /* set pcieDpmLevel to mid_pcie_level_enabled */ - levels[1].pcieDpmLevel = mid_pcie_level_enabled; - } - /* level count will send to smc once at init smc table and never change */ - result = smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels, - (uint32_t)array_size, SMC_RAM_END); - - return result; -} - - -/** - * MCLK Frequency Ratio - * SEQ_CG_RESP Bit[31:24] - 0x0 - * Bit[27:24] \96 DDR3 Frequency ratio - * 0x0 <= 100MHz, 450 < 0x8 <= 500MHz - * 100 < 0x1 <= 150MHz, 500 < 0x9 <= 550MHz - * 150 < 0x2 <= 200MHz, 550 < 0xA <= 600MHz - * 200 < 0x3 <= 250MHz, 600 < 0xB <= 650MHz - * 250 < 0x4 <= 300MHz, 650 < 0xC <= 700MHz - * 300 < 0x5 <= 350MHz, 700 < 0xD <= 750MHz - * 350 < 0x6 <= 400MHz, 750 < 0xE <= 800MHz - * 400 < 0x7 <= 450MHz, 800 < 0xF - */ -static uint8_t fiji_get_mclk_frequency_ratio(uint32_t mem_clock) -{ - if (mem_clock <= 10000) - return 0x0; - if (mem_clock <= 15000) - return 0x1; - if (mem_clock <= 20000) - return 0x2; - if (mem_clock <= 25000) - return 0x3; - if (mem_clock <= 30000) - return 0x4; - if (mem_clock <= 35000) - return 0x5; - if (mem_clock <= 40000) - return 0x6; - if (mem_clock <= 45000) - return 0x7; - if (mem_clock <= 50000) - return 0x8; - if (mem_clock <= 55000) - return 0x9; - if (mem_clock <= 60000) - return 0xa; - if (mem_clock <= 65000) - return 0xb; - if (mem_clock <= 70000) - return 0xc; - if (mem_clock <= 75000) - return 0xd; - if (mem_clock <= 80000) - return 0xe; - /* mem_clock > 800MHz */ - return 0xf; -} - -/** -* Populates the SMC MCLK structure using the provided memory clock -* -* @param hwmgr the address of the hardware manager -* @param clock the memory clock to use to populate the structure -* @param sclk the SMC SCLK structure to be populated -*/ -static int fiji_calculate_mclk_params(struct pp_hwmgr *hwmgr, - uint32_t clock, struct SMU73_Discrete_MemoryLevel *mclk) -{ - struct pp_atomctrl_memory_clock_param mem_param; - int result; - - result = atomctrl_get_memory_pll_dividers_vi(hwmgr, clock, &mem_param); - PP_ASSERT_WITH_CODE((0 == result), - "Failed to get Memory PLL Dividers.", - ); - - /* Save the result data to outpupt memory level structure */ - mclk->MclkFrequency = clock; - mclk->MclkDivider = (uint8_t)mem_param.mpll_post_divider; - mclk->FreqRange = fiji_get_mclk_frequency_ratio(clock); - - return result; -} - -static int fiji_populate_single_memory_level(struct pp_hwmgr *hwmgr, - uint32_t clock, struct SMU73_Discrete_MemoryLevel *mem_level) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - int result = 0; - uint32_t mclk_stutter_mode_threshold = 60000; - - if (table_info->vdd_dep_on_mclk) { - result = fiji_get_dependency_volt_by_clk(hwmgr, - table_info->vdd_dep_on_mclk, clock, - (uint32_t *)(&mem_level->MinVoltage), &mem_level->MinMvdd); - PP_ASSERT_WITH_CODE((0 == result), - "can not find MinVddc voltage value from memory " - "VDDC voltage dependency table", return result); - } - - mem_level->EnabledForThrottle = 1; - mem_level->EnabledForActivity = 0; - mem_level->UpHyst = 0; - mem_level->DownHyst = 100; - mem_level->VoltageDownHyst = 0; - mem_level->ActivityLevel = (uint16_t)data->mclk_activity_target; - mem_level->StutterEnable = false; - - mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - - /* enable stutter mode if all the follow condition applied - * PECI_GetNumberOfActiveDisplays(hwmgr->pPECI, - * &(data->DisplayTiming.numExistingDisplays)); - */ - data->display_timing.num_existing_displays = 1; - - if (mclk_stutter_mode_threshold && - (clock <= mclk_stutter_mode_threshold) && - (!data->is_uvd_enabled) && - (PHM_READ_FIELD(hwmgr->device, DPG_PIPE_STUTTER_CONTROL, - STUTTER_ENABLE) & 0x1)) - mem_level->StutterEnable = true; - - result = fiji_calculate_mclk_params(hwmgr, clock, mem_level); - if (!result) { - CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MinMvdd); - CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MclkFrequency); - CONVERT_FROM_HOST_TO_SMC_US(mem_level->ActivityLevel); - CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MinVoltage); - } - return result; -} - -/** -* Populates all SMC MCLK levels' structure based on the trimmed allowed dpm memory clock states -* -* @param hwmgr the address of the hardware manager -*/ -int fiji_populate_all_memory_levels(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - struct smu7_dpm_table *dpm_table = &data->dpm_table; - int result; - /* populate MCLK dpm table to SMU7 */ - uint32_t array = smu_data->smu7_data.dpm_table_start + - offsetof(SMU73_Discrete_DpmTable, MemoryLevel); - uint32_t array_size = sizeof(SMU73_Discrete_MemoryLevel) * - SMU73_MAX_LEVELS_MEMORY; - struct SMU73_Discrete_MemoryLevel *levels = - smu_data->smc_state_table.MemoryLevel; - uint32_t i; - - for (i = 0; i < dpm_table->mclk_table.count; i++) { - PP_ASSERT_WITH_CODE((0 != dpm_table->mclk_table.dpm_levels[i].value), - "can not populate memory level as memory clock is zero", - return -EINVAL); - result = fiji_populate_single_memory_level(hwmgr, - dpm_table->mclk_table.dpm_levels[i].value, - &levels[i]); - if (result) - return result; - } - - /* Only enable level 0 for now. */ - levels[0].EnabledForActivity = 1; - - /* in order to prevent MC activity from stutter mode to push DPM up. - * the UVD change complements this by putting the MCLK in - * a higher state by default such that we are not effected by - * up threshold or and MCLK DPM latency. - */ - levels[0].ActivityLevel = (uint16_t)data->mclk_dpm0_activity_target; - CONVERT_FROM_HOST_TO_SMC_US(levels[0].ActivityLevel); - - smu_data->smc_state_table.MemoryDpmLevelCount = - (uint8_t)dpm_table->mclk_table.count; - data->dpm_level_enable_mask.mclk_dpm_enable_mask = - phm_get_dpm_level_enable_mask_value(&dpm_table->mclk_table); - /* set highest level watermark to high */ - levels[dpm_table->mclk_table.count - 1].DisplayWatermark = - PPSMC_DISPLAY_WATERMARK_HIGH; - - /* level count will send to smc once at init smc table and never change */ - result = smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels, - (uint32_t)array_size, SMC_RAM_END); - - return result; -} - - -/** -* Populates the SMC MVDD structure using the provided memory clock. -* -* @param hwmgr the address of the hardware manager -* @param mclk the MCLK value to be used in the decision if MVDD should be high or low. -* @param voltage the SMC VOLTAGE structure to be populated -*/ -static int fiji_populate_mvdd_value(struct pp_hwmgr *hwmgr, - uint32_t mclk, SMIO_Pattern *smio_pat) -{ - const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - uint32_t i = 0; - - if (SMU7_VOLTAGE_CONTROL_NONE != data->mvdd_control) { - /* find mvdd value which clock is more than request */ - for (i = 0; i < table_info->vdd_dep_on_mclk->count; i++) { - if (mclk <= table_info->vdd_dep_on_mclk->entries[i].clk) { - smio_pat->Voltage = data->mvdd_voltage_table.entries[i].value; - break; - } - } - PP_ASSERT_WITH_CODE(i < table_info->vdd_dep_on_mclk->count, - "MVDD Voltage is outside the supported range.", - return -EINVAL); - } else - return -EINVAL; - - return 0; -} - -static int fiji_populate_smc_acpi_level(struct pp_hwmgr *hwmgr, - SMU73_Discrete_DpmTable *table) -{ - int result = 0; - const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct pp_atomctrl_clock_dividers_vi dividers; - SMIO_Pattern vol_level; - uint32_t mvdd; - uint16_t us_mvdd; - uint32_t spll_func_cntl = data->clock_registers.vCG_SPLL_FUNC_CNTL; - uint32_t spll_func_cntl_2 = data->clock_registers.vCG_SPLL_FUNC_CNTL_2; - - table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC; - - if (!data->sclk_dpm_key_disabled) { - /* Get MinVoltage and Frequency from DPM0, - * already converted to SMC_UL */ - table->ACPILevel.SclkFrequency = - data->dpm_table.sclk_table.dpm_levels[0].value; - result = fiji_get_dependency_volt_by_clk(hwmgr, - table_info->vdd_dep_on_sclk, - table->ACPILevel.SclkFrequency, - (uint32_t *)(&table->ACPILevel.MinVoltage), &mvdd); - PP_ASSERT_WITH_CODE((0 == result), - "Cannot find ACPI VDDC voltage value " \ - "in Clock Dependency Table", - ); - } else { - table->ACPILevel.SclkFrequency = - data->vbios_boot_state.sclk_bootup_value; - table->ACPILevel.MinVoltage = - data->vbios_boot_state.vddc_bootup_value * VOLTAGE_SCALE; - } - - /* get the engine clock dividers for this clock value */ - result = atomctrl_get_engine_pll_dividers_vi(hwmgr, - table->ACPILevel.SclkFrequency, ÷rs); - PP_ASSERT_WITH_CODE(result == 0, - "Error retrieving Engine Clock dividers from VBIOS.", - return result); - - table->ACPILevel.SclkDid = (uint8_t)dividers.pll_post_divider; - table->ACPILevel.DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - table->ACPILevel.DeepSleepDivId = 0; - - spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, CG_SPLL_FUNC_CNTL, - SPLL_PWRON, 0); - spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, CG_SPLL_FUNC_CNTL, - SPLL_RESET, 1); - spll_func_cntl_2 = PHM_SET_FIELD(spll_func_cntl_2, CG_SPLL_FUNC_CNTL_2, - SCLK_MUX_SEL, 4); - - table->ACPILevel.CgSpllFuncCntl = spll_func_cntl; - table->ACPILevel.CgSpllFuncCntl2 = spll_func_cntl_2; - table->ACPILevel.CgSpllFuncCntl3 = data->clock_registers.vCG_SPLL_FUNC_CNTL_3; - table->ACPILevel.CgSpllFuncCntl4 = data->clock_registers.vCG_SPLL_FUNC_CNTL_4; - table->ACPILevel.SpllSpreadSpectrum = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM; - table->ACPILevel.SpllSpreadSpectrum2 = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM_2; - table->ACPILevel.CcPwrDynRm = 0; - table->ACPILevel.CcPwrDynRm1 = 0; - - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.Flags); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SclkFrequency); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.MinVoltage); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl2); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl3); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl4); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SpllSpreadSpectrum); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SpllSpreadSpectrum2); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm1); - - if (!data->mclk_dpm_key_disabled) { - /* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */ - table->MemoryACPILevel.MclkFrequency = - data->dpm_table.mclk_table.dpm_levels[0].value; - result = fiji_get_dependency_volt_by_clk(hwmgr, - table_info->vdd_dep_on_mclk, - table->MemoryACPILevel.MclkFrequency, - (uint32_t *)(&table->MemoryACPILevel.MinVoltage), &mvdd); - PP_ASSERT_WITH_CODE((0 == result), - "Cannot find ACPI VDDCI voltage value in Clock Dependency Table", - ); - } else { - table->MemoryACPILevel.MclkFrequency = - data->vbios_boot_state.mclk_bootup_value; - table->MemoryACPILevel.MinVoltage = - data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE; - } - - us_mvdd = 0; - if ((SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) || - (data->mclk_dpm_key_disabled)) - us_mvdd = data->vbios_boot_state.mvdd_bootup_value; - else { - if (!fiji_populate_mvdd_value(hwmgr, - data->dpm_table.mclk_table.dpm_levels[0].value, - &vol_level)) - us_mvdd = vol_level.Voltage; - } - - table->MemoryACPILevel.MinMvdd = - PP_HOST_TO_SMC_UL(us_mvdd * VOLTAGE_SCALE); - - table->MemoryACPILevel.EnabledForThrottle = 0; - table->MemoryACPILevel.EnabledForActivity = 0; - table->MemoryACPILevel.UpHyst = 0; - table->MemoryACPILevel.DownHyst = 100; - table->MemoryACPILevel.VoltageDownHyst = 0; - table->MemoryACPILevel.ActivityLevel = - PP_HOST_TO_SMC_US((uint16_t)data->mclk_activity_target); - - table->MemoryACPILevel.StutterEnable = false; - CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MclkFrequency); - CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MinVoltage); - - return result; -} - -static int fiji_populate_smc_vce_level(struct pp_hwmgr *hwmgr, - SMU73_Discrete_DpmTable *table) -{ - int result = -EINVAL; - uint8_t count; - struct pp_atomctrl_clock_dividers_vi dividers; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = - table_info->mm_dep_table; - - table->VceLevelCount = (uint8_t)(mm_table->count); - table->VceBootLevel = 0; - - for (count = 0; count < table->VceLevelCount; count++) { - table->VceLevel[count].Frequency = mm_table->entries[count].eclk; - table->VceLevel[count].MinVoltage = 0; - table->VceLevel[count].MinVoltage |= - (mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT; - table->VceLevel[count].MinVoltage |= - ((mm_table->entries[count].vddc - VDDC_VDDCI_DELTA) * - VOLTAGE_SCALE) << VDDCI_SHIFT; - table->VceLevel[count].MinVoltage |= 1 << PHASES_SHIFT; - - /*retrieve divider value for VBIOS */ - result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, - table->VceLevel[count].Frequency, ÷rs); - PP_ASSERT_WITH_CODE((0 == result), - "can not find divide id for VCE engine clock", - return result); - - table->VceLevel[count].Divider = (uint8_t)dividers.pll_post_divider; - - CONVERT_FROM_HOST_TO_SMC_UL(table->VceLevel[count].Frequency); - CONVERT_FROM_HOST_TO_SMC_UL(table->VceLevel[count].MinVoltage); - } - return result; -} - -static int fiji_populate_smc_acp_level(struct pp_hwmgr *hwmgr, - SMU73_Discrete_DpmTable *table) -{ - int result = -EINVAL; - uint8_t count; - struct pp_atomctrl_clock_dividers_vi dividers; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = - table_info->mm_dep_table; - - table->AcpLevelCount = (uint8_t)(mm_table->count); - table->AcpBootLevel = 0; - - for (count = 0; count < table->AcpLevelCount; count++) { - table->AcpLevel[count].Frequency = mm_table->entries[count].aclk; - table->AcpLevel[count].MinVoltage |= (mm_table->entries[count].vddc * - VOLTAGE_SCALE) << VDDC_SHIFT; - table->AcpLevel[count].MinVoltage |= ((mm_table->entries[count].vddc - - VDDC_VDDCI_DELTA) * VOLTAGE_SCALE) << VDDCI_SHIFT; - table->AcpLevel[count].MinVoltage |= 1 << PHASES_SHIFT; - - /* retrieve divider value for VBIOS */ - result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, - table->AcpLevel[count].Frequency, ÷rs); - PP_ASSERT_WITH_CODE((0 == result), - "can not find divide id for engine clock", return result); - - table->AcpLevel[count].Divider = (uint8_t)dividers.pll_post_divider; - - CONVERT_FROM_HOST_TO_SMC_UL(table->AcpLevel[count].Frequency); - CONVERT_FROM_HOST_TO_SMC_UL(table->AcpLevel[count].MinVoltage); - } - return result; -} - -static int fiji_populate_smc_samu_level(struct pp_hwmgr *hwmgr, - SMU73_Discrete_DpmTable *table) -{ - int result = -EINVAL; - uint8_t count; - struct pp_atomctrl_clock_dividers_vi dividers; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = - table_info->mm_dep_table; - - table->SamuBootLevel = 0; - table->SamuLevelCount = (uint8_t)(mm_table->count); - - for (count = 0; count < table->SamuLevelCount; count++) { - /* not sure whether we need evclk or not */ - table->SamuLevel[count].MinVoltage = 0; - table->SamuLevel[count].Frequency = mm_table->entries[count].samclock; - table->SamuLevel[count].MinVoltage |= (mm_table->entries[count].vddc * - VOLTAGE_SCALE) << VDDC_SHIFT; - table->SamuLevel[count].MinVoltage |= ((mm_table->entries[count].vddc - - VDDC_VDDCI_DELTA) * VOLTAGE_SCALE) << VDDCI_SHIFT; - table->SamuLevel[count].MinVoltage |= 1 << PHASES_SHIFT; - - /* retrieve divider value for VBIOS */ - result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, - table->SamuLevel[count].Frequency, ÷rs); - PP_ASSERT_WITH_CODE((0 == result), - "can not find divide id for samu clock", return result); - - table->SamuLevel[count].Divider = (uint8_t)dividers.pll_post_divider; - - CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].Frequency); - CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].MinVoltage); - } - return result; -} - -static int fiji_populate_memory_timing_parameters(struct pp_hwmgr *hwmgr, - int32_t eng_clock, int32_t mem_clock, - struct SMU73_Discrete_MCArbDramTimingTableEntry *arb_regs) -{ - uint32_t dram_timing; - uint32_t dram_timing2; - uint32_t burstTime; - ULONG state, trrds, trrdl; - int result; - - result = atomctrl_set_engine_dram_timings_rv770(hwmgr, - eng_clock, mem_clock); - PP_ASSERT_WITH_CODE(result == 0, - "Error calling VBIOS to set DRAM_TIMING.", return result); - - dram_timing = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING); - dram_timing2 = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING2); - burstTime = cgs_read_register(hwmgr->device, mmMC_ARB_BURST_TIME); - - state = PHM_GET_FIELD(burstTime, MC_ARB_BURST_TIME, STATE0); - trrds = PHM_GET_FIELD(burstTime, MC_ARB_BURST_TIME, TRRDS0); - trrdl = PHM_GET_FIELD(burstTime, MC_ARB_BURST_TIME, TRRDL0); - - arb_regs->McArbDramTiming = PP_HOST_TO_SMC_UL(dram_timing); - arb_regs->McArbDramTiming2 = PP_HOST_TO_SMC_UL(dram_timing2); - arb_regs->McArbBurstTime = (uint8_t)burstTime; - arb_regs->TRRDS = (uint8_t)trrds; - arb_regs->TRRDL = (uint8_t)trrdl; - - return 0; -} - -static int fiji_program_memory_timing_parameters(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - struct SMU73_Discrete_MCArbDramTimingTable arb_regs; - uint32_t i, j; - int result = 0; - - for (i = 0; i < data->dpm_table.sclk_table.count; i++) { - for (j = 0; j < data->dpm_table.mclk_table.count; j++) { - result = fiji_populate_memory_timing_parameters(hwmgr, - data->dpm_table.sclk_table.dpm_levels[i].value, - data->dpm_table.mclk_table.dpm_levels[j].value, - &arb_regs.entries[i][j]); - if (result) - break; - } - } - - if (!result) - result = smu7_copy_bytes_to_smc( - hwmgr, - smu_data->smu7_data.arb_table_start, - (uint8_t *)&arb_regs, - sizeof(SMU73_Discrete_MCArbDramTimingTable), - SMC_RAM_END); - return result; -} - -static int fiji_populate_smc_uvd_level(struct pp_hwmgr *hwmgr, - struct SMU73_Discrete_DpmTable *table) -{ - int result = -EINVAL; - uint8_t count; - struct pp_atomctrl_clock_dividers_vi dividers; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = - table_info->mm_dep_table; - - table->UvdLevelCount = (uint8_t)(mm_table->count); - table->UvdBootLevel = 0; - - for (count = 0; count < table->UvdLevelCount; count++) { - table->UvdLevel[count].MinVoltage = 0; - table->UvdLevel[count].VclkFrequency = mm_table->entries[count].vclk; - table->UvdLevel[count].DclkFrequency = mm_table->entries[count].dclk; - table->UvdLevel[count].MinVoltage |= (mm_table->entries[count].vddc * - VOLTAGE_SCALE) << VDDC_SHIFT; - table->UvdLevel[count].MinVoltage |= ((mm_table->entries[count].vddc - - VDDC_VDDCI_DELTA) * VOLTAGE_SCALE) << VDDCI_SHIFT; - table->UvdLevel[count].MinVoltage |= 1 << PHASES_SHIFT; - - /* retrieve divider value for VBIOS */ - result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, - table->UvdLevel[count].VclkFrequency, ÷rs); - PP_ASSERT_WITH_CODE((0 == result), - "can not find divide id for Vclk clock", return result); - - table->UvdLevel[count].VclkDivider = (uint8_t)dividers.pll_post_divider; - - result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, - table->UvdLevel[count].DclkFrequency, ÷rs); - PP_ASSERT_WITH_CODE((0 == result), - "can not find divide id for Dclk clock", return result); - - table->UvdLevel[count].DclkDivider = (uint8_t)dividers.pll_post_divider; - - CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].VclkFrequency); - CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].DclkFrequency); - CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].MinVoltage); - - } - return result; -} - -static int fiji_populate_smc_boot_level(struct pp_hwmgr *hwmgr, - struct SMU73_Discrete_DpmTable *table) -{ - int result = 0; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - table->GraphicsBootLevel = 0; - table->MemoryBootLevel = 0; - - /* find boot level from dpm table */ - result = phm_find_boot_level(&(data->dpm_table.sclk_table), - data->vbios_boot_state.sclk_bootup_value, - (uint32_t *)&(table->GraphicsBootLevel)); - - result = phm_find_boot_level(&(data->dpm_table.mclk_table), - data->vbios_boot_state.mclk_bootup_value, - (uint32_t *)&(table->MemoryBootLevel)); - - table->BootVddc = data->vbios_boot_state.vddc_bootup_value * - VOLTAGE_SCALE; - table->BootVddci = data->vbios_boot_state.vddci_bootup_value * - VOLTAGE_SCALE; - table->BootMVdd = data->vbios_boot_state.mvdd_bootup_value * - VOLTAGE_SCALE; - - CONVERT_FROM_HOST_TO_SMC_US(table->BootVddc); - CONVERT_FROM_HOST_TO_SMC_US(table->BootVddci); - CONVERT_FROM_HOST_TO_SMC_US(table->BootMVdd); - - return 0; -} - -static int fiji_populate_smc_initailial_state(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - uint8_t count, level; - - count = (uint8_t)(table_info->vdd_dep_on_sclk->count); - for (level = 0; level < count; level++) { - if (table_info->vdd_dep_on_sclk->entries[level].clk >= - data->vbios_boot_state.sclk_bootup_value) { - smu_data->smc_state_table.GraphicsBootLevel = level; - break; - } - } - - count = (uint8_t)(table_info->vdd_dep_on_mclk->count); - for (level = 0; level < count; level++) { - if (table_info->vdd_dep_on_mclk->entries[level].clk >= - data->vbios_boot_state.mclk_bootup_value) { - smu_data->smc_state_table.MemoryBootLevel = level; - break; - } - } - - return 0; -} - -static int fiji_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr) -{ - uint32_t ro, efuse, efuse2, clock_freq, volt_without_cks, - volt_with_cks, value; - uint16_t clock_freq_u16; - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - uint8_t type, i, j, cks_setting, stretch_amount, stretch_amount2, - volt_offset = 0; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table = - table_info->vdd_dep_on_sclk; - - stretch_amount = (uint8_t)table_info->cac_dtp_table->usClockStretchAmount; - - /* Read SMU_Eefuse to read and calculate RO and determine - * if the part is SS or FF. if RO >= 1660MHz, part is FF. - */ - efuse = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, - ixSMU_EFUSE_0 + (146 * 4)); - efuse2 = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, - ixSMU_EFUSE_0 + (148 * 4)); - efuse &= 0xFF000000; - efuse = efuse >> 24; - efuse2 &= 0xF; - - if (efuse2 == 1) - ro = (2300 - 1350) * efuse / 255 + 1350; - else - ro = (2500 - 1000) * efuse / 255 + 1000; - - if (ro >= 1660) - type = 0; - else - type = 1; - - /* Populate Stretch amount */ - smu_data->smc_state_table.ClockStretcherAmount = stretch_amount; - - /* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */ - for (i = 0; i < sclk_table->count; i++) { - smu_data->smc_state_table.Sclk_CKS_masterEn0_7 |= - sclk_table->entries[i].cks_enable << i; - volt_without_cks = (uint32_t)((14041 * - (sclk_table->entries[i].clk/100) / 10000 + 3571 + 75 - ro) * 1000 / - (4026 - (13924 * (sclk_table->entries[i].clk/100) / 10000))); - volt_with_cks = (uint32_t)((13946 * - (sclk_table->entries[i].clk/100) / 10000 + 3320 + 45 - ro) * 1000 / - (3664 - (11454 * (sclk_table->entries[i].clk/100) / 10000))); - if (volt_without_cks >= volt_with_cks) - volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks + - sclk_table->entries[i].cks_voffset) * 100 / 625) + 1); - smu_data->smc_state_table.Sclk_voltageOffset[i] = volt_offset; - } - - PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, - STRETCH_ENABLE, 0x0); - PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, - masterReset, 0x1); - PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, - staticEnable, 0x1); - PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, - masterReset, 0x0); - - /* Populate CKS Lookup Table */ - if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5) - stretch_amount2 = 0; - else if (stretch_amount == 3 || stretch_amount == 4) - stretch_amount2 = 1; - else { - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_ClockStretcher); - PP_ASSERT_WITH_CODE(false, - "Stretch Amount in PPTable not supported\n", - return -EINVAL); - } - - value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, - ixPWR_CKS_CNTL); - value &= 0xFFC2FF87; - smu_data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].minFreq = - fiji_clock_stretcher_lookup_table[stretch_amount2][0]; - smu_data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].maxFreq = - fiji_clock_stretcher_lookup_table[stretch_amount2][1]; - clock_freq_u16 = (uint16_t)(PP_SMC_TO_HOST_UL(smu_data->smc_state_table. - GraphicsLevel[smu_data->smc_state_table.GraphicsDpmLevelCount - 1]. - SclkFrequency) / 100); - if (fiji_clock_stretcher_lookup_table[stretch_amount2][0] < - clock_freq_u16 && - fiji_clock_stretcher_lookup_table[stretch_amount2][1] > - clock_freq_u16) { - /* Program PWR_CKS_CNTL. CKS_USE_FOR_LOW_FREQ */ - value |= (fiji_clock_stretcher_lookup_table[stretch_amount2][3]) << 16; - /* Program PWR_CKS_CNTL. CKS_LDO_REFSEL */ - value |= (fiji_clock_stretcher_lookup_table[stretch_amount2][2]) << 18; - /* Program PWR_CKS_CNTL. CKS_STRETCH_AMOUNT */ - value |= (fiji_clock_stretch_amount_conversion - [fiji_clock_stretcher_lookup_table[stretch_amount2][3]] - [stretch_amount]) << 3; - } - CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table.CKS_LOOKUPTable. - CKS_LOOKUPTableEntry[0].minFreq); - CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table.CKS_LOOKUPTable. - CKS_LOOKUPTableEntry[0].maxFreq); - smu_data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting = - fiji_clock_stretcher_lookup_table[stretch_amount2][2] & 0x7F; - smu_data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting |= - (fiji_clock_stretcher_lookup_table[stretch_amount2][3]) << 7; - - cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, - ixPWR_CKS_CNTL, value); - - /* Populate DDT Lookup Table */ - for (i = 0; i < 4; i++) { - /* Assign the minimum and maximum VID stored - * in the last row of Clock Stretcher Voltage Table. - */ - smu_data->smc_state_table.ClockStretcherDataTable. - ClockStretcherDataTableEntry[i].minVID = - (uint8_t) fiji_clock_stretcher_ddt_table[type][i][2]; - smu_data->smc_state_table.ClockStretcherDataTable. - ClockStretcherDataTableEntry[i].maxVID = - (uint8_t) fiji_clock_stretcher_ddt_table[type][i][3]; - /* Loop through each SCLK and check the frequency - * to see if it lies within the frequency for clock stretcher. - */ - for (j = 0; j < smu_data->smc_state_table.GraphicsDpmLevelCount; j++) { - cks_setting = 0; - clock_freq = PP_SMC_TO_HOST_UL( - smu_data->smc_state_table.GraphicsLevel[j].SclkFrequency); - /* Check the allowed frequency against the sclk level[j]. - * Sclk's endianness has already been converted, - * and it's in 10Khz unit, - * as opposed to Data table, which is in Mhz unit. - */ - if (clock_freq >= - (fiji_clock_stretcher_ddt_table[type][i][0]) * 100) { - cks_setting |= 0x2; - if (clock_freq < - (fiji_clock_stretcher_ddt_table[type][i][1]) * 100) - cks_setting |= 0x1; - } - smu_data->smc_state_table.ClockStretcherDataTable. - ClockStretcherDataTableEntry[i].setting |= cks_setting << (j * 2); - } - CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table. - ClockStretcherDataTable. - ClockStretcherDataTableEntry[i].setting); - } - - value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL); - value &= 0xFFFFFFFE; - cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL, value); - - return 0; -} - -/** -* Populates the SMC VRConfig field in DPM table. -* -* @param hwmgr the address of the hardware manager -* @param table the SMC DPM table structure to be populated -* @return always 0 -*/ -static int fiji_populate_vr_config(struct pp_hwmgr *hwmgr, - struct SMU73_Discrete_DpmTable *table) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - uint16_t config; - - config = VR_MERGED_WITH_VDDC; - table->VRConfig |= (config << VRCONF_VDDGFX_SHIFT); - - /* Set Vddc Voltage Controller */ - if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) { - config = VR_SVI2_PLANE_1; - table->VRConfig |= config; - } else { - PP_ASSERT_WITH_CODE(false, - "VDDC should be on SVI2 control in merged mode!", - ); - } - /* Set Vddci Voltage Controller */ - if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) { - config = VR_SVI2_PLANE_2; /* only in merged mode */ - table->VRConfig |= (config << VRCONF_VDDCI_SHIFT); - } else if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) { - config = VR_SMIO_PATTERN_1; - table->VRConfig |= (config << VRCONF_VDDCI_SHIFT); - } else { - config = VR_STATIC_VOLTAGE; - table->VRConfig |= (config << VRCONF_VDDCI_SHIFT); - } - /* Set Mvdd Voltage Controller */ - if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->mvdd_control) { - config = VR_SVI2_PLANE_2; - table->VRConfig |= (config << VRCONF_MVDD_SHIFT); - } else if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) { - config = VR_SMIO_PATTERN_2; - table->VRConfig |= (config << VRCONF_MVDD_SHIFT); - } else { - config = VR_STATIC_VOLTAGE; - table->VRConfig |= (config << VRCONF_MVDD_SHIFT); - } - - return 0; -} - -static int fiji_init_arb_table_index(struct pp_hwmgr *hwmgr) -{ - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - uint32_t tmp; - int result; - - /* This is a read-modify-write on the first byte of the ARB table. - * The first byte in the SMU73_Discrete_MCArbDramTimingTable structure - * is the field 'current'. - * This solution is ugly, but we never write the whole table only - * individual fields in it. - * In reality this field should not be in that structure - * but in a soft register. - */ - result = smu7_read_smc_sram_dword(hwmgr, - smu_data->smu7_data.arb_table_start, &tmp, SMC_RAM_END); - - if (result) - return result; - - tmp &= 0x00FFFFFF; - tmp |= ((uint32_t)MC_CG_ARB_FREQ_F1) << 24; - - return smu7_write_smc_sram_dword(hwmgr, - smu_data->smu7_data.arb_table_start, tmp, SMC_RAM_END); -} - -static int fiji_save_default_power_profile(struct pp_hwmgr *hwmgr) -{ - struct fiji_smumgr *data = (struct fiji_smumgr *)(hwmgr->smu_backend); - struct SMU73_Discrete_GraphicsLevel *levels = - data->smc_state_table.GraphicsLevel; - unsigned min_level = 1; - - hwmgr->default_gfx_power_profile.activity_threshold = - be16_to_cpu(levels[0].ActivityLevel); - hwmgr->default_gfx_power_profile.up_hyst = levels[0].UpHyst; - hwmgr->default_gfx_power_profile.down_hyst = levels[0].DownHyst; - hwmgr->default_gfx_power_profile.type = AMD_PP_GFX_PROFILE; - - hwmgr->default_compute_power_profile = hwmgr->default_gfx_power_profile; - hwmgr->default_compute_power_profile.type = AMD_PP_COMPUTE_PROFILE; - - /* Workaround compute SDMA instability: disable lowest SCLK - * DPM level. Optimize compute power profile: Use only highest - * 2 power levels (if more than 2 are available), Hysteresis: - * 0ms up, 5ms down - */ - if (data->smc_state_table.GraphicsDpmLevelCount > 2) - min_level = data->smc_state_table.GraphicsDpmLevelCount - 2; - else if (data->smc_state_table.GraphicsDpmLevelCount == 2) - min_level = 1; - else - min_level = 0; - hwmgr->default_compute_power_profile.min_sclk = - be32_to_cpu(levels[min_level].SclkFrequency); - hwmgr->default_compute_power_profile.up_hyst = 0; - hwmgr->default_compute_power_profile.down_hyst = 5; - - hwmgr->gfx_power_profile = hwmgr->default_gfx_power_profile; - hwmgr->compute_power_profile = hwmgr->default_compute_power_profile; - - return 0; -} - -static int fiji_setup_dpm_led_config(struct pp_hwmgr *hwmgr) -{ - pp_atomctrl_voltage_table param_led_dpm; - int result = 0; - u32 mask = 0; - - result = atomctrl_get_voltage_table_v3(hwmgr, - VOLTAGE_TYPE_LEDDPM, VOLTAGE_OBJ_GPIO_LUT, - ¶m_led_dpm); - if (result == 0) { - int i, j; - u32 tmp = param_led_dpm.mask_low; - - for (i = 0, j = 0; i < 32; i++) { - if (tmp & 1) { - mask |= (i << (8 * j)); - if (++j >= 3) - break; - } - tmp >>= 1; - } - } - if (mask) - smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_LedConfig, - mask); - return 0; -} - -/** -* Initializes the SMC table and uploads it -* -* @param hwmgr the address of the powerplay hardware manager. -* @param pInput the pointer to input data (PowerState) -* @return always 0 -*/ -int fiji_init_smc_table(struct pp_hwmgr *hwmgr) -{ - int result; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct SMU73_Discrete_DpmTable *table = &(smu_data->smc_state_table); - uint8_t i; - struct pp_atomctrl_gpio_pin_assignment gpio_pin; - - fiji_initialize_power_tune_defaults(hwmgr); - - if (SMU7_VOLTAGE_CONTROL_NONE != data->voltage_control) - fiji_populate_smc_voltage_tables(hwmgr, table); - - table->SystemFlags = 0; - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_AutomaticDCTransition)) - table->SystemFlags |= PPSMC_SYSTEMFLAG_GPIO_DC; - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_StepVddc)) - table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC; - - if (data->is_memory_gddr5) - table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5; - - if (data->ulv_supported && table_info->us_ulv_voltage_offset) { - result = fiji_populate_ulv_state(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize ULV state!", return result); - cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, - ixCG_ULV_PARAMETER, 0x40035); - } - - result = fiji_populate_smc_link_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize Link Level!", return result); - - result = fiji_populate_all_graphic_levels(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize Graphics Level!", return result); - - result = fiji_populate_all_memory_levels(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize Memory Level!", return result); - - result = fiji_populate_smc_acpi_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize ACPI Level!", return result); - - result = fiji_populate_smc_vce_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize VCE Level!", return result); - - result = fiji_populate_smc_acp_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize ACP Level!", return result); - - result = fiji_populate_smc_samu_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize SAMU Level!", return result); - - /* Since only the initial state is completely set up at this point - * (the other states are just copies of the boot state) we only - * need to populate the ARB settings for the initial state. - */ - result = fiji_program_memory_timing_parameters(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to Write ARB settings for the initial state.", return result); - - result = fiji_populate_smc_uvd_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize UVD Level!", return result); - - result = fiji_populate_smc_boot_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize Boot Level!", return result); - - result = fiji_populate_smc_initailial_state(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize Boot State!", return result); - - result = fiji_populate_bapm_parameters_in_dpm_table(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to populate BAPM Parameters!", return result); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_ClockStretcher)) { - result = fiji_populate_clock_stretcher_data_table(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to populate Clock Stretcher Data Table!", - return result); - } - - table->GraphicsVoltageChangeEnable = 1; - table->GraphicsThermThrottleEnable = 1; - table->GraphicsInterval = 1; - table->VoltageInterval = 1; - table->ThermalInterval = 1; - table->TemperatureLimitHigh = - table_info->cac_dtp_table->usTargetOperatingTemp * - SMU7_Q88_FORMAT_CONVERSION_UNIT; - table->TemperatureLimitLow = - (table_info->cac_dtp_table->usTargetOperatingTemp - 1) * - SMU7_Q88_FORMAT_CONVERSION_UNIT; - table->MemoryVoltageChangeEnable = 1; - table->MemoryInterval = 1; - table->VoltageResponseTime = 0; - table->PhaseResponseTime = 0; - table->MemoryThermThrottleEnable = 1; - table->PCIeBootLinkLevel = 0; /* 0:Gen1 1:Gen2 2:Gen3*/ - table->PCIeGenInterval = 1; - table->VRConfig = 0; - - result = fiji_populate_vr_config(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to populate VRConfig setting!", return result); - - table->ThermGpio = 17; - table->SclkStepSize = 0x4000; - - if (atomctrl_get_pp_assign_pin(hwmgr, VDDC_VRHOT_GPIO_PINID, &gpio_pin)) { - table->VRHotGpio = gpio_pin.uc_gpio_pin_bit_shift; - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_RegulatorHot); - } else { - table->VRHotGpio = SMU7_UNUSED_GPIO_PIN; - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_RegulatorHot); - } - - if (atomctrl_get_pp_assign_pin(hwmgr, PP_AC_DC_SWITCH_GPIO_PINID, - &gpio_pin)) { - table->AcDcGpio = gpio_pin.uc_gpio_pin_bit_shift; - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_AutomaticDCTransition); - } else { - table->AcDcGpio = SMU7_UNUSED_GPIO_PIN; - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_AutomaticDCTransition); - } - - /* Thermal Output GPIO */ - if (atomctrl_get_pp_assign_pin(hwmgr, THERMAL_INT_OUTPUT_GPIO_PINID, - &gpio_pin)) { - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_ThermalOutGPIO); - - table->ThermOutGpio = gpio_pin.uc_gpio_pin_bit_shift; - - /* For porlarity read GPIOPAD_A with assigned Gpio pin - * since VBIOS will program this register to set 'inactive state', - * driver can then determine 'active state' from this and - * program SMU with correct polarity - */ - table->ThermOutPolarity = (0 == (cgs_read_register(hwmgr->device, mmGPIOPAD_A) & - (1 << gpio_pin.uc_gpio_pin_bit_shift))) ? 1:0; - table->ThermOutMode = SMU7_THERM_OUT_MODE_THERM_ONLY; - - /* if required, combine VRHot/PCC with thermal out GPIO */ - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_RegulatorHot) && - phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_CombinePCCWithThermalSignal)) - table->ThermOutMode = SMU7_THERM_OUT_MODE_THERM_VRHOT; - } else { - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_ThermalOutGPIO); - table->ThermOutGpio = 17; - table->ThermOutPolarity = 1; - table->ThermOutMode = SMU7_THERM_OUT_MODE_DISABLE; - } - - for (i = 0; i < SMU73_MAX_ENTRIES_SMIO; i++) - table->Smio[i] = PP_HOST_TO_SMC_UL(table->Smio[i]); - - CONVERT_FROM_HOST_TO_SMC_UL(table->SystemFlags); - CONVERT_FROM_HOST_TO_SMC_UL(table->VRConfig); - CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask1); - CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask2); - CONVERT_FROM_HOST_TO_SMC_UL(table->SclkStepSize); - CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitHigh); - CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitLow); - CONVERT_FROM_HOST_TO_SMC_US(table->VoltageResponseTime); - CONVERT_FROM_HOST_TO_SMC_US(table->PhaseResponseTime); - - /* Upload all dpm data to SMC memory.(dpm level, dpm level count etc) */ - result = smu7_copy_bytes_to_smc(hwmgr, - smu_data->smu7_data.dpm_table_start + - offsetof(SMU73_Discrete_DpmTable, SystemFlags), - (uint8_t *)&(table->SystemFlags), - sizeof(SMU73_Discrete_DpmTable) - 3 * sizeof(SMU73_PIDController), - SMC_RAM_END); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to upload dpm data to SMC memory!", return result); - - result = fiji_init_arb_table_index(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to upload arb data to SMC memory!", return result); - - result = fiji_populate_pm_fuses(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to populate PM fuses to SMC memory!", return result); - - result = fiji_setup_dpm_led_config(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to setup dpm led config", return result); - - fiji_save_default_power_profile(hwmgr); - - return 0; -} - -/** -* Set up the fan table to control the fan using the SMC. -* @param hwmgr the address of the powerplay hardware manager. -* @param pInput the pointer to input data -* @param pOutput the pointer to output data -* @param pStorage the pointer to temporary storage -* @param Result the last failure code -* @return result from set temperature range routine -*/ -int fiji_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) -{ - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - - SMU73_Discrete_FanTable fan_table = { FDO_MODE_HARDWARE }; - uint32_t duty100; - uint32_t t_diff1, t_diff2, pwm_diff1, pwm_diff2; - uint16_t fdo_min, slope1, slope2; - uint32_t reference_clock; - int res; - uint64_t tmp64; - - if (hwmgr->thermal_controller.fanInfo.bNoFan) { - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_MicrocodeFanControl); - return 0; - } - - if (smu_data->smu7_data.fan_table_start == 0) { - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_MicrocodeFanControl); - return 0; - } - - duty100 = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, - CG_FDO_CTRL1, FMAX_DUTY100); - - if (duty100 == 0) { - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_MicrocodeFanControl); - return 0; - } - - tmp64 = hwmgr->thermal_controller.advanceFanControlParameters. - usPWMMin * duty100; - do_div(tmp64, 10000); - fdo_min = (uint16_t)tmp64; - - t_diff1 = hwmgr->thermal_controller.advanceFanControlParameters.usTMed - - hwmgr->thermal_controller.advanceFanControlParameters.usTMin; - t_diff2 = hwmgr->thermal_controller.advanceFanControlParameters.usTHigh - - hwmgr->thermal_controller.advanceFanControlParameters.usTMed; - - pwm_diff1 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMMed - - hwmgr->thermal_controller.advanceFanControlParameters.usPWMMin; - pwm_diff2 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMHigh - - hwmgr->thermal_controller.advanceFanControlParameters.usPWMMed; - - slope1 = (uint16_t)((50 + ((16 * duty100 * pwm_diff1) / t_diff1)) / 100); - slope2 = (uint16_t)((50 + ((16 * duty100 * pwm_diff2) / t_diff2)) / 100); - - fan_table.TempMin = cpu_to_be16((50 + hwmgr-> - thermal_controller.advanceFanControlParameters.usTMin) / 100); - fan_table.TempMed = cpu_to_be16((50 + hwmgr-> - thermal_controller.advanceFanControlParameters.usTMed) / 100); - fan_table.TempMax = cpu_to_be16((50 + hwmgr-> - thermal_controller.advanceFanControlParameters.usTMax) / 100); - - fan_table.Slope1 = cpu_to_be16(slope1); - fan_table.Slope2 = cpu_to_be16(slope2); - - fan_table.FdoMin = cpu_to_be16(fdo_min); - - fan_table.HystDown = cpu_to_be16(hwmgr-> - thermal_controller.advanceFanControlParameters.ucTHyst); - - fan_table.HystUp = cpu_to_be16(1); - - fan_table.HystSlope = cpu_to_be16(1); - - fan_table.TempRespLim = cpu_to_be16(5); - - reference_clock = smu7_get_xclk(hwmgr); - - fan_table.RefreshPeriod = cpu_to_be32((hwmgr-> - thermal_controller.advanceFanControlParameters.ulCycleDelay * - reference_clock) / 1600); - - fan_table.FdoMax = cpu_to_be16((uint16_t)duty100); - - fan_table.TempSrc = (uint8_t)PHM_READ_VFPF_INDIRECT_FIELD( - hwmgr->device, CGS_IND_REG__SMC, - CG_MULT_THERMAL_CTRL, TEMP_SEL); - - res = smu7_copy_bytes_to_smc(hwmgr, smu_data->smu7_data.fan_table_start, - (uint8_t *)&fan_table, (uint32_t)sizeof(fan_table), - SMC_RAM_END); - - if (!res && hwmgr->thermal_controller. - advanceFanControlParameters.ucMinimumPWMLimit) - res = smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SetFanMinPwm, - hwmgr->thermal_controller. - advanceFanControlParameters.ucMinimumPWMLimit); - - if (!res && hwmgr->thermal_controller. - advanceFanControlParameters.ulMinFanSCLKAcousticLimit) - res = smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SetFanSclkTarget, - hwmgr->thermal_controller. - advanceFanControlParameters.ulMinFanSCLKAcousticLimit); - - if (res) - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_MicrocodeFanControl); - - return 0; -} - - -int fiji_thermal_avfs_enable(struct pp_hwmgr *hwmgr) -{ - int ret; - struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(hwmgr->smu_backend); - - if (smu_data->avfs.avfs_btc_status != AVFS_BTC_ENABLEAVFS) - return 0; - - ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableAvfs); - - if (!ret) - /* If this param is not changed, this function could fire unnecessarily */ - smu_data->avfs.avfs_btc_status = AVFS_BTC_COMPLETED_PREVIOUSLY; - - return ret; -} - -static int fiji_program_mem_timing_parameters(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - if (data->need_update_smu7_dpm_table & - (DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_OD_UPDATE_MCLK)) - return fiji_program_memory_timing_parameters(hwmgr); - - return 0; -} - -int fiji_update_sclk_threshold(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - - int result = 0; - uint32_t low_sclk_interrupt_threshold = 0; - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_SclkThrottleLowNotification) - && (hwmgr->gfx_arbiter.sclk_threshold != - data->low_sclk_interrupt_threshold)) { - data->low_sclk_interrupt_threshold = - hwmgr->gfx_arbiter.sclk_threshold; - low_sclk_interrupt_threshold = - data->low_sclk_interrupt_threshold; - - CONVERT_FROM_HOST_TO_SMC_UL(low_sclk_interrupt_threshold); - - result = smu7_copy_bytes_to_smc( - hwmgr, - smu_data->smu7_data.dpm_table_start + - offsetof(SMU73_Discrete_DpmTable, - LowSclkInterruptThreshold), - (uint8_t *)&low_sclk_interrupt_threshold, - sizeof(uint32_t), - SMC_RAM_END); - } - result = fiji_program_mem_timing_parameters(hwmgr); - PP_ASSERT_WITH_CODE((result == 0), - "Failed to program memory timing parameters!", - ); - return result; -} - -uint32_t fiji_get_offsetof(uint32_t type, uint32_t member) -{ - switch (type) { - case SMU_SoftRegisters: - switch (member) { - case HandshakeDisables: - return offsetof(SMU73_SoftRegisters, HandshakeDisables); - case VoltageChangeTimeout: - return offsetof(SMU73_SoftRegisters, VoltageChangeTimeout); - case AverageGraphicsActivity: - return offsetof(SMU73_SoftRegisters, AverageGraphicsActivity); - case PreVBlankGap: - return offsetof(SMU73_SoftRegisters, PreVBlankGap); - case VBlankTimeout: - return offsetof(SMU73_SoftRegisters, VBlankTimeout); - case UcodeLoadStatus: - return offsetof(SMU73_SoftRegisters, UcodeLoadStatus); - } - case SMU_Discrete_DpmTable: - switch (member) { - case UvdBootLevel: - return offsetof(SMU73_Discrete_DpmTable, UvdBootLevel); - case VceBootLevel: - return offsetof(SMU73_Discrete_DpmTable, VceBootLevel); - case SamuBootLevel: - return offsetof(SMU73_Discrete_DpmTable, SamuBootLevel); - case LowSclkInterruptThreshold: - return offsetof(SMU73_Discrete_DpmTable, LowSclkInterruptThreshold); - } - } - pr_warn("can't get the offset of type %x member %x\n", type, member); - return 0; -} - -uint32_t fiji_get_mac_definition(uint32_t value) -{ - switch (value) { - case SMU_MAX_LEVELS_GRAPHICS: - return SMU73_MAX_LEVELS_GRAPHICS; - case SMU_MAX_LEVELS_MEMORY: - return SMU73_MAX_LEVELS_MEMORY; - case SMU_MAX_LEVELS_LINK: - return SMU73_MAX_LEVELS_LINK; - case SMU_MAX_ENTRIES_SMIO: - return SMU73_MAX_ENTRIES_SMIO; - case SMU_MAX_LEVELS_VDDC: - return SMU73_MAX_LEVELS_VDDC; - case SMU_MAX_LEVELS_VDDGFX: - return SMU73_MAX_LEVELS_VDDGFX; - case SMU_MAX_LEVELS_VDDCI: - return SMU73_MAX_LEVELS_VDDCI; - case SMU_MAX_LEVELS_MVDD: - return SMU73_MAX_LEVELS_MVDD; - } - - pr_warn("can't get the mac of %x\n", value); - return 0; -} - - -static int fiji_update_uvd_smc_table(struct pp_hwmgr *hwmgr) -{ - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - uint32_t mm_boot_level_offset, mm_boot_level_value; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - - smu_data->smc_state_table.UvdBootLevel = 0; - if (table_info->mm_dep_table->count > 0) - smu_data->smc_state_table.UvdBootLevel = - (uint8_t) (table_info->mm_dep_table->count - 1); - mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + offsetof(SMU73_Discrete_DpmTable, - UvdBootLevel); - mm_boot_level_offset /= 4; - mm_boot_level_offset *= 4; - mm_boot_level_value = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset); - mm_boot_level_value &= 0x00FFFFFF; - mm_boot_level_value |= smu_data->smc_state_table.UvdBootLevel << 24; - cgs_write_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); - - if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_UVDDPM) || - phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_StablePState)) - smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_UVDDPM_SetEnabledMask, - (uint32_t)(1 << smu_data->smc_state_table.UvdBootLevel)); - return 0; -} - -static int fiji_update_vce_smc_table(struct pp_hwmgr *hwmgr) -{ - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - uint32_t mm_boot_level_offset, mm_boot_level_value; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_StablePState)) - smu_data->smc_state_table.VceBootLevel = - (uint8_t) (table_info->mm_dep_table->count - 1); - else - smu_data->smc_state_table.VceBootLevel = 0; - - mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + - offsetof(SMU73_Discrete_DpmTable, VceBootLevel); - mm_boot_level_offset /= 4; - mm_boot_level_offset *= 4; - mm_boot_level_value = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset); - mm_boot_level_value &= 0xFF00FFFF; - mm_boot_level_value |= smu_data->smc_state_table.VceBootLevel << 16; - cgs_write_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState)) - smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_VCEDPM_SetEnabledMask, - (uint32_t)1 << smu_data->smc_state_table.VceBootLevel); - return 0; -} - -static int fiji_update_samu_smc_table(struct pp_hwmgr *hwmgr) -{ - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - uint32_t mm_boot_level_offset, mm_boot_level_value; - - - smu_data->smc_state_table.SamuBootLevel = 0; - mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + - offsetof(SMU73_Discrete_DpmTable, SamuBootLevel); - - mm_boot_level_offset /= 4; - mm_boot_level_offset *= 4; - mm_boot_level_value = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset); - mm_boot_level_value &= 0xFFFFFF00; - mm_boot_level_value |= smu_data->smc_state_table.SamuBootLevel << 0; - cgs_write_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_StablePState)) - smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SAMUDPM_SetEnabledMask, - (uint32_t)(1 << smu_data->smc_state_table.SamuBootLevel)); - return 0; -} - -int fiji_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type) -{ - switch (type) { - case SMU_UVD_TABLE: - fiji_update_uvd_smc_table(hwmgr); - break; - case SMU_VCE_TABLE: - fiji_update_vce_smc_table(hwmgr); - break; - case SMU_SAMU_TABLE: - fiji_update_samu_smc_table(hwmgr); - break; - default: - break; - } - return 0; -} - - -/** -* Get the location of various tables inside the FW image. -* -* @param hwmgr the address of the powerplay hardware manager. -* @return always 0 -*/ -int fiji_process_firmware_header(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - uint32_t tmp; - int result; - bool error = false; - - result = smu7_read_smc_sram_dword(hwmgr, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU73_Firmware_Header, DpmTable), - &tmp, SMC_RAM_END); - - if (0 == result) - smu_data->smu7_data.dpm_table_start = tmp; - - error |= (0 != result); - - result = smu7_read_smc_sram_dword(hwmgr, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU73_Firmware_Header, SoftRegisters), - &tmp, SMC_RAM_END); - - if (!result) { - data->soft_regs_start = tmp; - smu_data->smu7_data.soft_regs_start = tmp; - } - - error |= (0 != result); - - result = smu7_read_smc_sram_dword(hwmgr, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU73_Firmware_Header, mcRegisterTable), - &tmp, SMC_RAM_END); - - if (!result) - smu_data->smu7_data.mc_reg_table_start = tmp; - - result = smu7_read_smc_sram_dword(hwmgr, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU73_Firmware_Header, FanTable), - &tmp, SMC_RAM_END); - - if (!result) - smu_data->smu7_data.fan_table_start = tmp; - - error |= (0 != result); - - result = smu7_read_smc_sram_dword(hwmgr, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU73_Firmware_Header, mcArbDramTimingTable), - &tmp, SMC_RAM_END); - - if (!result) - smu_data->smu7_data.arb_table_start = tmp; - - error |= (0 != result); - - result = smu7_read_smc_sram_dword(hwmgr, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU73_Firmware_Header, Version), - &tmp, SMC_RAM_END); - - if (!result) - hwmgr->microcode_version_info.SMC = tmp; - - error |= (0 != result); - - return error ? -1 : 0; -} - -int fiji_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) -{ - - /* Program additional LP registers - * that are no longer programmed by VBIOS - */ - cgs_write_register(hwmgr->device, mmMC_SEQ_RAS_TIMING_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_RAS_TIMING)); - cgs_write_register(hwmgr->device, mmMC_SEQ_CAS_TIMING_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_CAS_TIMING)); - cgs_write_register(hwmgr->device, mmMC_SEQ_MISC_TIMING2_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_MISC_TIMING2)); - cgs_write_register(hwmgr->device, mmMC_SEQ_WR_CTL_D1_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_WR_CTL_D1)); - cgs_write_register(hwmgr->device, mmMC_SEQ_RD_CTL_D0_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_RD_CTL_D0)); - cgs_write_register(hwmgr->device, mmMC_SEQ_RD_CTL_D1_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_RD_CTL_D1)); - cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_TIMING_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_PMG_TIMING)); - - return 0; -} - -bool fiji_is_dpm_running(struct pp_hwmgr *hwmgr) -{ - return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) - ? true : false; -} - -int fiji_populate_requested_graphic_levels(struct pp_hwmgr *hwmgr, - struct amd_pp_profile *request) -{ - struct fiji_smumgr *smu_data = (struct fiji_smumgr *) - (hwmgr->smu_backend); - struct SMU73_Discrete_GraphicsLevel *levels = - smu_data->smc_state_table.GraphicsLevel; - uint32_t array = smu_data->smu7_data.dpm_table_start + - offsetof(SMU73_Discrete_DpmTable, GraphicsLevel); - uint32_t array_size = sizeof(struct SMU73_Discrete_GraphicsLevel) * - SMU73_MAX_LEVELS_GRAPHICS; - uint32_t i; - - for (i = 0; i < smu_data->smc_state_table.GraphicsDpmLevelCount; i++) { - levels[i].ActivityLevel = - cpu_to_be16(request->activity_threshold); - levels[i].EnabledForActivity = 1; - levels[i].UpHyst = request->up_hyst; - levels[i].DownHyst = request->down_hyst; - } - - return smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels, - array_size, SMC_RAM_END); -} diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.h b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.h deleted file mode 100644 index d9c72d992e30..000000000000 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2015 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ -#ifndef FIJI_SMC_H -#define FIJI_SMC_H - -#include "smumgr.h" -#include "smu73.h" - -struct fiji_pt_defaults { - uint8_t SviLoadLineEn; - uint8_t SviLoadLineVddC; - uint8_t TDC_VDDC_ThrottleReleaseLimitPerc; - uint8_t TDC_MAWt; - uint8_t TdcWaterfallCtl; - uint8_t DTEAmbientTempBase; -}; - -int fiji_populate_all_graphic_levels(struct pp_hwmgr *hwmgr); -int fiji_populate_all_memory_levels(struct pp_hwmgr *hwmgr); -int fiji_init_smc_table(struct pp_hwmgr *hwmgr); -int fiji_thermal_setup_fan_table(struct pp_hwmgr *hwmgr); -int fiji_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type); -int fiji_update_sclk_threshold(struct pp_hwmgr *hwmgr); -uint32_t fiji_get_offsetof(uint32_t type, uint32_t member); -uint32_t fiji_get_mac_definition(uint32_t value); -int fiji_process_firmware_header(struct pp_hwmgr *hwmgr); -int fiji_initialize_mc_reg_table(struct pp_hwmgr *hwmgr); -bool fiji_is_dpm_running(struct pp_hwmgr *hwmgr); -int fiji_populate_requested_graphic_levels(struct pp_hwmgr *hwmgr, - struct amd_pp_profile *request); -int fiji_thermal_avfs_enable(struct pp_hwmgr *hwmgr); -#endif - diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c index 592a89aff12b..f572beff197f 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c @@ -23,6 +23,7 @@ #include "pp_debug.h" #include "smumgr.h" +#include "smu7_dyn_defaults.h" #include "smu73.h" #include "smu_ucode_xfer_vi.h" #include "fiji_smumgr.h" @@ -37,14 +38,54 @@ #include "gca/gfx_8_0_d.h" #include "bif/bif_5_0_d.h" #include "bif/bif_5_0_sh_mask.h" -#include "fiji_pwrvirus.h" -#include "fiji_smc.h" +#include "dce/dce_10_0_d.h" +#include "dce/dce_10_0_sh_mask.h" +#include "hardwaremanager.h" +#include "cgs_common.h" +#include "atombios.h" +#include "pppcielanes.h" +#include "hwmgr.h" +#include "smu7_hwmgr.h" + #define AVFS_EN_MSB 1568 #define AVFS_EN_LSB 1568 #define FIJI_SMC_SIZE 0x20000 +#define VOLTAGE_SCALE 4 +#define POWERTUNE_DEFAULT_SET_MAX 1 +#define VOLTAGE_VID_OFFSET_SCALE1 625 +#define VOLTAGE_VID_OFFSET_SCALE2 100 +#define VDDC_VDDCI_DELTA 300 +#define MC_CG_ARB_FREQ_F1 0x0b + +/* [2.5%,~2.5%] Clock stretched is multiple of 2.5% vs + * not and [Fmin, Fmax, LDO_REFSEL, USE_FOR_LOW_FREQ] + */ +static const uint16_t fiji_clock_stretcher_lookup_table[2][4] = { + {600, 1050, 3, 0}, {600, 1050, 6, 1} }; + +/* [FF, SS] type, [] 4 voltage ranges, and + * [Floor Freq, Boundary Freq, VID min , VID max] + */ +static const uint32_t fiji_clock_stretcher_ddt_table[2][4][4] = { + { {265, 529, 120, 128}, {325, 650, 96, 119}, {430, 860, 32, 95}, {0, 0, 0, 31} }, + { {275, 550, 104, 112}, {319, 638, 96, 103}, {360, 720, 64, 95}, {384, 768, 32, 63} } }; + +/* [Use_For_Low_freq] value, [0%, 5%, 10%, 7.14%, 14.28%, 20%] + * (coming from PWR_CKS_CNTL.stretch_amount reg spec) + */ +static const uint8_t fiji_clock_stretch_amount_conversion[2][6] = { + {0, 1, 3, 2, 4, 5}, {0, 2, 4, 5, 6, 5} }; + +static const struct fiji_pt_defaults fiji_power_tune_data_set_array[POWERTUNE_DEFAULT_SET_MAX] = { + /*sviLoadLIneEn, SviLoadLineVddC, TDC_VDDC_ThrottleReleaseLimitPerc */ + {1, 0xF, 0xFD, + /* TDC_MAWt, TdcWaterfallCtl, DTEAmbientTempBase */ + 0x19, 5, 45} +}; + static const struct SMU73_Discrete_GraphicsLevel avfs_graphics_level[8] = { /* Min Sclk pcie DeepSleep Activity CgSpll CgSpll spllSpread SpllSpread CcPwr CcPwr Sclk Display Enabled Enabled Voltage Power */ /* Voltage, Frequency, DpmLevel, DivId, Level, FuncCntl3, FuncCntl4, Spectrum, Spectrum2, DynRm, DynRm1 Did, Watermark, ForActivity, ForThrottle, UpHyst, DownHyst, DownHyst, Throttle */ @@ -159,46 +200,6 @@ static int fiji_start_smu_in_non_protection_mode(struct pp_hwmgr *hwmgr) return result; } -static void execute_pwr_table(struct pp_hwmgr *hwmgr, const PWR_Command_Table *pvirus, int size) -{ - int i; - uint32_t reg, data; - - for (i = 0; i < size; i++) { - reg = pvirus->reg; - data = pvirus->data; - if (reg != 0xffffffff) - cgs_write_register(hwmgr->device, reg, data); - else - break; - pvirus++; - } -} - -static void execute_pwr_dfy_table(struct pp_hwmgr *hwmgr, const PWR_DFY_Section *section) -{ - int i; - cgs_write_register(hwmgr->device, mmCP_DFY_CNTL, section->dfy_cntl); - cgs_write_register(hwmgr->device, mmCP_DFY_ADDR_HI, section->dfy_addr_hi); - cgs_write_register(hwmgr->device, mmCP_DFY_ADDR_LO, section->dfy_addr_lo); - for (i = 0; i < section->dfy_size; i++) - cgs_write_register(hwmgr->device, mmCP_DFY_DATA_0, section->dfy_data[i]); -} - -static int fiji_setup_pwr_virus(struct pp_hwmgr *hwmgr) -{ - execute_pwr_table(hwmgr, PwrVirusTable_pre, ARRAY_SIZE(PwrVirusTable_pre)); - execute_pwr_dfy_table(hwmgr, &pwr_virus_section1); - execute_pwr_dfy_table(hwmgr, &pwr_virus_section2); - execute_pwr_dfy_table(hwmgr, &pwr_virus_section3); - execute_pwr_dfy_table(hwmgr, &pwr_virus_section4); - execute_pwr_dfy_table(hwmgr, &pwr_virus_section5); - execute_pwr_dfy_table(hwmgr, &pwr_virus_section6); - execute_pwr_table(hwmgr, PwrVirusTable_post, ARRAY_SIZE(PwrVirusTable_post)); - - return 0; -} - static int fiji_start_avfs_btc(struct pp_hwmgr *hwmgr) { int result = 0; @@ -277,7 +278,7 @@ static int fiji_avfs_event_mgr(struct pp_hwmgr *hwmgr, bool smu_started) " table over to SMU", return -EINVAL;); smu_data->avfs.avfs_btc_status = AVFS_BTC_VIRUS_FAIL; - PP_ASSERT_WITH_CODE(0 == fiji_setup_pwr_virus(hwmgr), + PP_ASSERT_WITH_CODE(0 == smu7_setup_pwr_virus(hwmgr), "[AVFS][fiji_avfs_event_mgr] Could not setup " "Pwr Virus for AVFS ", return -EINVAL;); @@ -365,13 +366,6 @@ static bool fiji_is_hw_avfs_present(struct pp_hwmgr *hwmgr) return false; } -/** -* Write a 32bit value to the SMC SRAM space. -* ALL PARAMETERS ARE IN HOST BYTE ORDER. -* @param smumgr the address of the powerplay hardware manager. -* @param smc_addr the address in the SMC RAM to access. -* @param value to write to the SMC SRAM. -*/ static int fiji_smu_init(struct pp_hwmgr *hwmgr) { int i; @@ -393,6 +387,2334 @@ static int fiji_smu_init(struct pp_hwmgr *hwmgr) return 0; } +static int fiji_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr, + struct phm_ppt_v1_clock_voltage_dependency_table *dep_table, + uint32_t clock, uint32_t *voltage, uint32_t *mvdd) +{ + uint32_t i; + uint16_t vddci; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + *voltage = *mvdd = 0; + + + /* clock - voltage dependency table is empty table */ + if (dep_table->count == 0) + return -EINVAL; + + for (i = 0; i < dep_table->count; i++) { + /* find first sclk bigger than request */ + if (dep_table->entries[i].clk >= clock) { + *voltage |= (dep_table->entries[i].vddc * + VOLTAGE_SCALE) << VDDC_SHIFT; + if (SMU7_VOLTAGE_CONTROL_NONE == data->vddci_control) + *voltage |= (data->vbios_boot_state.vddci_bootup_value * + VOLTAGE_SCALE) << VDDCI_SHIFT; + else if (dep_table->entries[i].vddci) + *voltage |= (dep_table->entries[i].vddci * + VOLTAGE_SCALE) << VDDCI_SHIFT; + else { + vddci = phm_find_closest_vddci(&(data->vddci_voltage_table), + (dep_table->entries[i].vddc - + VDDC_VDDCI_DELTA)); + *voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; + } + + if (SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) + *mvdd = data->vbios_boot_state.mvdd_bootup_value * + VOLTAGE_SCALE; + else if (dep_table->entries[i].mvdd) + *mvdd = (uint32_t) dep_table->entries[i].mvdd * + VOLTAGE_SCALE; + + *voltage |= 1 << PHASES_SHIFT; + return 0; + } + } + + /* sclk is bigger than max sclk in the dependence table */ + *voltage |= (dep_table->entries[i - 1].vddc * VOLTAGE_SCALE) << VDDC_SHIFT; + + if (SMU7_VOLTAGE_CONTROL_NONE == data->vddci_control) + *voltage |= (data->vbios_boot_state.vddci_bootup_value * + VOLTAGE_SCALE) << VDDCI_SHIFT; + else if (dep_table->entries[i-1].vddci) { + vddci = phm_find_closest_vddci(&(data->vddci_voltage_table), + (dep_table->entries[i].vddc - + VDDC_VDDCI_DELTA)); + *voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; + } + + if (SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) + *mvdd = data->vbios_boot_state.mvdd_bootup_value * VOLTAGE_SCALE; + else if (dep_table->entries[i].mvdd) + *mvdd = (uint32_t) dep_table->entries[i - 1].mvdd * VOLTAGE_SCALE; + + return 0; +} + + +static uint16_t scale_fan_gain_settings(uint16_t raw_setting) +{ + uint32_t tmp; + tmp = raw_setting * 4096 / 100; + return (uint16_t)tmp; +} + +static void get_scl_sda_value(uint8_t line, uint8_t *scl, uint8_t *sda) +{ + switch (line) { + case SMU7_I2CLineID_DDC1: + *scl = SMU7_I2C_DDC1CLK; + *sda = SMU7_I2C_DDC1DATA; + break; + case SMU7_I2CLineID_DDC2: + *scl = SMU7_I2C_DDC2CLK; + *sda = SMU7_I2C_DDC2DATA; + break; + case SMU7_I2CLineID_DDC3: + *scl = SMU7_I2C_DDC3CLK; + *sda = SMU7_I2C_DDC3DATA; + break; + case SMU7_I2CLineID_DDC4: + *scl = SMU7_I2C_DDC4CLK; + *sda = SMU7_I2C_DDC4DATA; + break; + case SMU7_I2CLineID_DDC5: + *scl = SMU7_I2C_DDC5CLK; + *sda = SMU7_I2C_DDC5DATA; + break; + case SMU7_I2CLineID_DDC6: + *scl = SMU7_I2C_DDC6CLK; + *sda = SMU7_I2C_DDC6DATA; + break; + case SMU7_I2CLineID_SCLSDA: + *scl = SMU7_I2C_SCL; + *sda = SMU7_I2C_SDA; + break; + case SMU7_I2CLineID_DDCVGA: + *scl = SMU7_I2C_DDCVGACLK; + *sda = SMU7_I2C_DDCVGADATA; + break; + default: + *scl = 0; + *sda = 0; + break; + } +} + +static void fiji_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr) +{ + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + if (table_info && + table_info->cac_dtp_table->usPowerTuneDataSetID <= POWERTUNE_DEFAULT_SET_MAX && + table_info->cac_dtp_table->usPowerTuneDataSetID) + smu_data->power_tune_defaults = + &fiji_power_tune_data_set_array + [table_info->cac_dtp_table->usPowerTuneDataSetID - 1]; + else + smu_data->power_tune_defaults = &fiji_power_tune_data_set_array[0]; + +} + +static int fiji_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr) +{ + + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + const struct fiji_pt_defaults *defaults = smu_data->power_tune_defaults; + + SMU73_Discrete_DpmTable *dpm_table = &(smu_data->smc_state_table); + + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_cac_tdp_table *cac_dtp_table = table_info->cac_dtp_table; + struct pp_advance_fan_control_parameters *fan_table = + &hwmgr->thermal_controller.advanceFanControlParameters; + uint8_t uc_scl, uc_sda; + + /* TDP number of fraction bits are changed from 8 to 7 for Fiji + * as requested by SMC team + */ + dpm_table->DefaultTdp = PP_HOST_TO_SMC_US( + (uint16_t)(cac_dtp_table->usTDP * 128)); + dpm_table->TargetTdp = PP_HOST_TO_SMC_US( + (uint16_t)(cac_dtp_table->usTDP * 128)); + + PP_ASSERT_WITH_CODE(cac_dtp_table->usTargetOperatingTemp <= 255, + "Target Operating Temp is out of Range!", + ); + + dpm_table->GpuTjMax = (uint8_t)(cac_dtp_table->usTargetOperatingTemp); + dpm_table->GpuTjHyst = 8; + + dpm_table->DTEAmbientTempBase = defaults->DTEAmbientTempBase; + + /* The following are for new Fiji Multi-input fan/thermal control */ + dpm_table->TemperatureLimitEdge = PP_HOST_TO_SMC_US( + cac_dtp_table->usTargetOperatingTemp * 256); + dpm_table->TemperatureLimitHotspot = PP_HOST_TO_SMC_US( + cac_dtp_table->usTemperatureLimitHotspot * 256); + dpm_table->TemperatureLimitLiquid1 = PP_HOST_TO_SMC_US( + cac_dtp_table->usTemperatureLimitLiquid1 * 256); + dpm_table->TemperatureLimitLiquid2 = PP_HOST_TO_SMC_US( + cac_dtp_table->usTemperatureLimitLiquid2 * 256); + dpm_table->TemperatureLimitVrVddc = PP_HOST_TO_SMC_US( + cac_dtp_table->usTemperatureLimitVrVddc * 256); + dpm_table->TemperatureLimitVrMvdd = PP_HOST_TO_SMC_US( + cac_dtp_table->usTemperatureLimitVrMvdd * 256); + dpm_table->TemperatureLimitPlx = PP_HOST_TO_SMC_US( + cac_dtp_table->usTemperatureLimitPlx * 256); + + dpm_table->FanGainEdge = PP_HOST_TO_SMC_US( + scale_fan_gain_settings(fan_table->usFanGainEdge)); + dpm_table->FanGainHotspot = PP_HOST_TO_SMC_US( + scale_fan_gain_settings(fan_table->usFanGainHotspot)); + dpm_table->FanGainLiquid = PP_HOST_TO_SMC_US( + scale_fan_gain_settings(fan_table->usFanGainLiquid)); + dpm_table->FanGainVrVddc = PP_HOST_TO_SMC_US( + scale_fan_gain_settings(fan_table->usFanGainVrVddc)); + dpm_table->FanGainVrMvdd = PP_HOST_TO_SMC_US( + scale_fan_gain_settings(fan_table->usFanGainVrMvdd)); + dpm_table->FanGainPlx = PP_HOST_TO_SMC_US( + scale_fan_gain_settings(fan_table->usFanGainPlx)); + dpm_table->FanGainHbm = PP_HOST_TO_SMC_US( + scale_fan_gain_settings(fan_table->usFanGainHbm)); + + dpm_table->Liquid1_I2C_address = cac_dtp_table->ucLiquid1_I2C_address; + dpm_table->Liquid2_I2C_address = cac_dtp_table->ucLiquid2_I2C_address; + dpm_table->Vr_I2C_address = cac_dtp_table->ucVr_I2C_address; + dpm_table->Plx_I2C_address = cac_dtp_table->ucPlx_I2C_address; + + get_scl_sda_value(cac_dtp_table->ucLiquid_I2C_Line, &uc_scl, &uc_sda); + dpm_table->Liquid_I2C_LineSCL = uc_scl; + dpm_table->Liquid_I2C_LineSDA = uc_sda; + + get_scl_sda_value(cac_dtp_table->ucVr_I2C_Line, &uc_scl, &uc_sda); + dpm_table->Vr_I2C_LineSCL = uc_scl; + dpm_table->Vr_I2C_LineSDA = uc_sda; + + get_scl_sda_value(cac_dtp_table->ucPlx_I2C_Line, &uc_scl, &uc_sda); + dpm_table->Plx_I2C_LineSCL = uc_scl; + dpm_table->Plx_I2C_LineSDA = uc_sda; + + return 0; +} + + +static int fiji_populate_svi_load_line(struct pp_hwmgr *hwmgr) +{ + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + const struct fiji_pt_defaults *defaults = smu_data->power_tune_defaults; + + smu_data->power_tune_table.SviLoadLineEn = defaults->SviLoadLineEn; + smu_data->power_tune_table.SviLoadLineVddC = defaults->SviLoadLineVddC; + smu_data->power_tune_table.SviLoadLineTrimVddC = 3; + smu_data->power_tune_table.SviLoadLineOffsetVddC = 0; + + return 0; +} + + +static int fiji_populate_tdc_limit(struct pp_hwmgr *hwmgr) +{ + uint16_t tdc_limit; + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + const struct fiji_pt_defaults *defaults = smu_data->power_tune_defaults; + + /* TDC number of fraction bits are changed from 8 to 7 + * for Fiji as requested by SMC team + */ + tdc_limit = (uint16_t)(table_info->cac_dtp_table->usTDC * 128); + smu_data->power_tune_table.TDC_VDDC_PkgLimit = + CONVERT_FROM_HOST_TO_SMC_US(tdc_limit); + smu_data->power_tune_table.TDC_VDDC_ThrottleReleaseLimitPerc = + defaults->TDC_VDDC_ThrottleReleaseLimitPerc; + smu_data->power_tune_table.TDC_MAWt = defaults->TDC_MAWt; + + return 0; +} + +static int fiji_populate_dw8(struct pp_hwmgr *hwmgr, uint32_t fuse_table_offset) +{ + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + const struct fiji_pt_defaults *defaults = smu_data->power_tune_defaults; + uint32_t temp; + + if (smu7_read_smc_sram_dword(hwmgr, + fuse_table_offset + + offsetof(SMU73_Discrete_PmFuses, TdcWaterfallCtl), + (uint32_t *)&temp, SMC_RAM_END)) + PP_ASSERT_WITH_CODE(false, + "Attempt to read PmFuses.DW6 (SviLoadLineEn) from SMC Failed!", + return -EINVAL); + else { + smu_data->power_tune_table.TdcWaterfallCtl = defaults->TdcWaterfallCtl; + smu_data->power_tune_table.LPMLTemperatureMin = + (uint8_t)((temp >> 16) & 0xff); + smu_data->power_tune_table.LPMLTemperatureMax = + (uint8_t)((temp >> 8) & 0xff); + smu_data->power_tune_table.Reserved = (uint8_t)(temp & 0xff); + } + return 0; +} + +static int fiji_populate_temperature_scaler(struct pp_hwmgr *hwmgr) +{ + int i; + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + + /* Currently not used. Set all to zero. */ + for (i = 0; i < 16; i++) + smu_data->power_tune_table.LPMLTemperatureScaler[i] = 0; + + return 0; +} + +static int fiji_populate_fuzzy_fan(struct pp_hwmgr *hwmgr) +{ + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + + if ((hwmgr->thermal_controller.advanceFanControlParameters. + usFanOutputSensitivity & (1 << 15)) || + 0 == hwmgr->thermal_controller.advanceFanControlParameters. + usFanOutputSensitivity) + hwmgr->thermal_controller.advanceFanControlParameters. + usFanOutputSensitivity = hwmgr->thermal_controller. + advanceFanControlParameters.usDefaultFanOutputSensitivity; + + smu_data->power_tune_table.FuzzyFan_PwmSetDelta = + PP_HOST_TO_SMC_US(hwmgr->thermal_controller. + advanceFanControlParameters.usFanOutputSensitivity); + return 0; +} + +static int fiji_populate_gnb_lpml(struct pp_hwmgr *hwmgr) +{ + int i; + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + + /* Currently not used. Set all to zero. */ + for (i = 0; i < 16; i++) + smu_data->power_tune_table.GnbLPML[i] = 0; + + return 0; +} + +static int fiji_populate_bapm_vddc_base_leakage_sidd(struct pp_hwmgr *hwmgr) +{ + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + uint16_t HiSidd = smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd; + uint16_t LoSidd = smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd; + struct phm_cac_tdp_table *cac_table = table_info->cac_dtp_table; + + HiSidd = (uint16_t)(cac_table->usHighCACLeakage / 100 * 256); + LoSidd = (uint16_t)(cac_table->usLowCACLeakage / 100 * 256); + + smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd = + CONVERT_FROM_HOST_TO_SMC_US(HiSidd); + smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd = + CONVERT_FROM_HOST_TO_SMC_US(LoSidd); + + return 0; +} + +static int fiji_populate_pm_fuses(struct pp_hwmgr *hwmgr) +{ + uint32_t pm_fuse_table_offset; + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_PowerContainment)) { + if (smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU73_Firmware_Header, PmFuseTable), + &pm_fuse_table_offset, SMC_RAM_END)) + PP_ASSERT_WITH_CODE(false, + "Attempt to get pm_fuse_table_offset Failed!", + return -EINVAL); + + /* DW6 */ + if (fiji_populate_svi_load_line(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate SviLoadLine Failed!", + return -EINVAL); + /* DW7 */ + if (fiji_populate_tdc_limit(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate TDCLimit Failed!", return -EINVAL); + /* DW8 */ + if (fiji_populate_dw8(hwmgr, pm_fuse_table_offset)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate TdcWaterfallCtl, " + "LPMLTemperature Min and Max Failed!", + return -EINVAL); + + /* DW9-DW12 */ + if (0 != fiji_populate_temperature_scaler(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate LPMLTemperatureScaler Failed!", + return -EINVAL); + + /* DW13-DW14 */ + if (fiji_populate_fuzzy_fan(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate Fuzzy Fan Control parameters Failed!", + return -EINVAL); + + /* DW15-DW18 */ + if (fiji_populate_gnb_lpml(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate GnbLPML Failed!", + return -EINVAL); + + /* DW20 */ + if (fiji_populate_bapm_vddc_base_leakage_sidd(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate BapmVddCBaseLeakage Hi and Lo " + "Sidd Failed!", return -EINVAL); + + if (smu7_copy_bytes_to_smc(hwmgr, pm_fuse_table_offset, + (uint8_t *)&smu_data->power_tune_table, + sizeof(struct SMU73_Discrete_PmFuses), SMC_RAM_END)) + PP_ASSERT_WITH_CODE(false, + "Attempt to download PmFuseTable Failed!", + return -EINVAL); + } + return 0; +} + +static int fiji_populate_cac_table(struct pp_hwmgr *hwmgr, + struct SMU73_Discrete_DpmTable *table) +{ + uint32_t count; + uint8_t index; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_voltage_lookup_table *lookup_table = + table_info->vddc_lookup_table; + /* tables is already swapped, so in order to use the value from it, + * we need to swap it back. + * We are populating vddc CAC data to BapmVddc table + * in split and merged mode + */ + + for (count = 0; count < lookup_table->count; count++) { + index = phm_get_voltage_index(lookup_table, + data->vddc_voltage_table.entries[count].value); + table->BapmVddcVidLoSidd[count] = + convert_to_vid(lookup_table->entries[index].us_cac_low); + table->BapmVddcVidHiSidd[count] = + convert_to_vid(lookup_table->entries[index].us_cac_high); + } + + return 0; +} + +static int fiji_populate_smc_voltage_tables(struct pp_hwmgr *hwmgr, + struct SMU73_Discrete_DpmTable *table) +{ + int result; + + result = fiji_populate_cac_table(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "can not populate CAC voltage tables to SMC", + return -EINVAL); + + return 0; +} + +static int fiji_populate_ulv_level(struct pp_hwmgr *hwmgr, + struct SMU73_Discrete_Ulv *state) +{ + int result = 0; + + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + state->CcPwrDynRm = 0; + state->CcPwrDynRm1 = 0; + + state->VddcOffset = (uint16_t) table_info->us_ulv_voltage_offset; + state->VddcOffsetVid = (uint8_t)(table_info->us_ulv_voltage_offset * + VOLTAGE_VID_OFFSET_SCALE2 / VOLTAGE_VID_OFFSET_SCALE1); + + state->VddcPhase = 1; + + if (!result) { + CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm1); + CONVERT_FROM_HOST_TO_SMC_US(state->VddcOffset); + } + return result; +} + +static int fiji_populate_ulv_state(struct pp_hwmgr *hwmgr, + struct SMU73_Discrete_DpmTable *table) +{ + return fiji_populate_ulv_level(hwmgr, &table->Ulv); +} + +static int fiji_populate_smc_link_level(struct pp_hwmgr *hwmgr, + struct SMU73_Discrete_DpmTable *table) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct smu7_dpm_table *dpm_table = &data->dpm_table; + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + int i; + + /* Index (dpm_table->pcie_speed_table.count) + * is reserved for PCIE boot level. */ + for (i = 0; i <= dpm_table->pcie_speed_table.count; i++) { + table->LinkLevel[i].PcieGenSpeed = + (uint8_t)dpm_table->pcie_speed_table.dpm_levels[i].value; + table->LinkLevel[i].PcieLaneCount = (uint8_t)encode_pcie_lane_width( + dpm_table->pcie_speed_table.dpm_levels[i].param1); + table->LinkLevel[i].EnabledForActivity = 1; + table->LinkLevel[i].SPC = (uint8_t)(data->pcie_spc_cap & 0xff); + table->LinkLevel[i].DownThreshold = PP_HOST_TO_SMC_UL(5); + table->LinkLevel[i].UpThreshold = PP_HOST_TO_SMC_UL(30); + } + + smu_data->smc_state_table.LinkLevelCount = + (uint8_t)dpm_table->pcie_speed_table.count; + data->dpm_level_enable_mask.pcie_dpm_enable_mask = + phm_get_dpm_level_enable_mask_value(&dpm_table->pcie_speed_table); + + return 0; +} + +static int fiji_calculate_sclk_params(struct pp_hwmgr *hwmgr, + uint32_t clock, struct SMU73_Discrete_GraphicsLevel *sclk) +{ + const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct pp_atomctrl_clock_dividers_vi dividers; + uint32_t spll_func_cntl = data->clock_registers.vCG_SPLL_FUNC_CNTL; + uint32_t spll_func_cntl_3 = data->clock_registers.vCG_SPLL_FUNC_CNTL_3; + uint32_t spll_func_cntl_4 = data->clock_registers.vCG_SPLL_FUNC_CNTL_4; + uint32_t cg_spll_spread_spectrum = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM; + uint32_t cg_spll_spread_spectrum_2 = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM_2; + uint32_t ref_clock; + uint32_t ref_divider; + uint32_t fbdiv; + int result; + + /* get the engine clock dividers for this clock value */ + result = atomctrl_get_engine_pll_dividers_vi(hwmgr, clock, ÷rs); + + PP_ASSERT_WITH_CODE(result == 0, + "Error retrieving Engine Clock dividers from VBIOS.", + return result); + + /* To get FBDIV we need to multiply this by 16384 and divide it by Fref. */ + ref_clock = atomctrl_get_reference_clock(hwmgr); + ref_divider = 1 + dividers.uc_pll_ref_div; + + /* low 14 bits is fraction and high 12 bits is divider */ + fbdiv = dividers.ul_fb_div.ul_fb_divider & 0x3FFFFFF; + + /* SPLL_FUNC_CNTL setup */ + spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, CG_SPLL_FUNC_CNTL, + SPLL_REF_DIV, dividers.uc_pll_ref_div); + spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, CG_SPLL_FUNC_CNTL, + SPLL_PDIV_A, dividers.uc_pll_post_div); + + /* SPLL_FUNC_CNTL_3 setup*/ + spll_func_cntl_3 = PHM_SET_FIELD(spll_func_cntl_3, CG_SPLL_FUNC_CNTL_3, + SPLL_FB_DIV, fbdiv); + + /* set to use fractional accumulation*/ + spll_func_cntl_3 = PHM_SET_FIELD(spll_func_cntl_3, CG_SPLL_FUNC_CNTL_3, + SPLL_DITHEN, 1); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_EngineSpreadSpectrumSupport)) { + struct pp_atomctrl_internal_ss_info ssInfo; + + uint32_t vco_freq = clock * dividers.uc_pll_post_div; + if (!atomctrl_get_engine_clock_spread_spectrum(hwmgr, + vco_freq, &ssInfo)) { + /* + * ss_info.speed_spectrum_percentage -- in unit of 0.01% + * ss_info.speed_spectrum_rate -- in unit of khz + * + * clks = reference_clock * 10 / (REFDIV + 1) / speed_spectrum_rate / 2 + */ + uint32_t clk_s = ref_clock * 5 / + (ref_divider * ssInfo.speed_spectrum_rate); + /* clkv = 2 * D * fbdiv / NS */ + uint32_t clk_v = 4 * ssInfo.speed_spectrum_percentage * + fbdiv / (clk_s * 10000); + + cg_spll_spread_spectrum = PHM_SET_FIELD(cg_spll_spread_spectrum, + CG_SPLL_SPREAD_SPECTRUM, CLKS, clk_s); + cg_spll_spread_spectrum = PHM_SET_FIELD(cg_spll_spread_spectrum, + CG_SPLL_SPREAD_SPECTRUM, SSEN, 1); + cg_spll_spread_spectrum_2 = PHM_SET_FIELD(cg_spll_spread_spectrum_2, + CG_SPLL_SPREAD_SPECTRUM_2, CLKV, clk_v); + } + } + + sclk->SclkFrequency = clock; + sclk->CgSpllFuncCntl3 = spll_func_cntl_3; + sclk->CgSpllFuncCntl4 = spll_func_cntl_4; + sclk->SpllSpreadSpectrum = cg_spll_spread_spectrum; + sclk->SpllSpreadSpectrum2 = cg_spll_spread_spectrum_2; + sclk->SclkDid = (uint8_t)dividers.pll_post_divider; + + return 0; +} + +static int fiji_populate_single_graphic_level(struct pp_hwmgr *hwmgr, + uint32_t clock, uint16_t sclk_al_threshold, + struct SMU73_Discrete_GraphicsLevel *level) +{ + int result; + /* PP_Clocks minClocks; */ + uint32_t threshold, mvdd; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + result = fiji_calculate_sclk_params(hwmgr, clock, level); + + /* populate graphics levels */ + result = fiji_get_dependency_volt_by_clk(hwmgr, + table_info->vdd_dep_on_sclk, clock, + (uint32_t *)(&level->MinVoltage), &mvdd); + PP_ASSERT_WITH_CODE((0 == result), + "can not find VDDC voltage value for " + "VDDC engine clock dependency table", + return result); + + level->SclkFrequency = clock; + level->ActivityLevel = sclk_al_threshold; + level->CcPwrDynRm = 0; + level->CcPwrDynRm1 = 0; + level->EnabledForActivity = 0; + level->EnabledForThrottle = 1; + level->UpHyst = 10; + level->DownHyst = 0; + level->VoltageDownHyst = 0; + level->PowerThrottle = 0; + + threshold = clock * data->fast_watermark_threshold / 100; + + data->display_timing.min_clock_in_sr = hwmgr->display_config.min_core_set_clock_in_sr; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) + level->DeepSleepDivId = smu7_get_sleep_divider_id_from_clock(clock, + hwmgr->display_config.min_core_set_clock_in_sr); + + + /* Default to slow, highest DPM level will be + * set to PPSMC_DISPLAY_WATERMARK_LOW later. + */ + level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + + CONVERT_FROM_HOST_TO_SMC_UL(level->MinVoltage); + CONVERT_FROM_HOST_TO_SMC_UL(level->SclkFrequency); + CONVERT_FROM_HOST_TO_SMC_US(level->ActivityLevel); + CONVERT_FROM_HOST_TO_SMC_UL(level->CgSpllFuncCntl3); + CONVERT_FROM_HOST_TO_SMC_UL(level->CgSpllFuncCntl4); + CONVERT_FROM_HOST_TO_SMC_UL(level->SpllSpreadSpectrum); + CONVERT_FROM_HOST_TO_SMC_UL(level->SpllSpreadSpectrum2); + CONVERT_FROM_HOST_TO_SMC_UL(level->CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(level->CcPwrDynRm1); + + return 0; +} + +static int fiji_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + + struct smu7_dpm_table *dpm_table = &data->dpm_table; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_pcie_table *pcie_table = table_info->pcie_table; + uint8_t pcie_entry_cnt = (uint8_t) data->dpm_table.pcie_speed_table.count; + int result = 0; + uint32_t array = smu_data->smu7_data.dpm_table_start + + offsetof(SMU73_Discrete_DpmTable, GraphicsLevel); + uint32_t array_size = sizeof(struct SMU73_Discrete_GraphicsLevel) * + SMU73_MAX_LEVELS_GRAPHICS; + struct SMU73_Discrete_GraphicsLevel *levels = + smu_data->smc_state_table.GraphicsLevel; + uint32_t i, max_entry; + uint8_t hightest_pcie_level_enabled = 0, + lowest_pcie_level_enabled = 0, + mid_pcie_level_enabled = 0, + count = 0; + + for (i = 0; i < dpm_table->sclk_table.count; i++) { + result = fiji_populate_single_graphic_level(hwmgr, + dpm_table->sclk_table.dpm_levels[i].value, + (uint16_t)smu_data->activity_target[i], + &levels[i]); + if (result) + return result; + + /* Making sure only DPM level 0-1 have Deep Sleep Div ID populated. */ + if (i > 1) + levels[i].DeepSleepDivId = 0; + } + + /* Only enable level 0 for now.*/ + levels[0].EnabledForActivity = 1; + + /* set highest level watermark to high */ + levels[dpm_table->sclk_table.count - 1].DisplayWatermark = + PPSMC_DISPLAY_WATERMARK_HIGH; + + smu_data->smc_state_table.GraphicsDpmLevelCount = + (uint8_t)dpm_table->sclk_table.count; + data->dpm_level_enable_mask.sclk_dpm_enable_mask = + phm_get_dpm_level_enable_mask_value(&dpm_table->sclk_table); + + if (pcie_table != NULL) { + PP_ASSERT_WITH_CODE((1 <= pcie_entry_cnt), + "There must be 1 or more PCIE levels defined in PPTable.", + return -EINVAL); + max_entry = pcie_entry_cnt - 1; + for (i = 0; i < dpm_table->sclk_table.count; i++) + levels[i].pcieDpmLevel = + (uint8_t) ((i < max_entry) ? i : max_entry); + } else { + while (data->dpm_level_enable_mask.pcie_dpm_enable_mask && + ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1 << (hightest_pcie_level_enabled + 1))) != 0)) + hightest_pcie_level_enabled++; + + while (data->dpm_level_enable_mask.pcie_dpm_enable_mask && + ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1 << lowest_pcie_level_enabled)) == 0)) + lowest_pcie_level_enabled++; + + while ((count < hightest_pcie_level_enabled) && + ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1 << (lowest_pcie_level_enabled + 1 + count))) == 0)) + count++; + + mid_pcie_level_enabled = (lowest_pcie_level_enabled + 1 + count) < + hightest_pcie_level_enabled ? + (lowest_pcie_level_enabled + 1 + count) : + hightest_pcie_level_enabled; + + /* set pcieDpmLevel to hightest_pcie_level_enabled */ + for (i = 2; i < dpm_table->sclk_table.count; i++) + levels[i].pcieDpmLevel = hightest_pcie_level_enabled; + + /* set pcieDpmLevel to lowest_pcie_level_enabled */ + levels[0].pcieDpmLevel = lowest_pcie_level_enabled; + + /* set pcieDpmLevel to mid_pcie_level_enabled */ + levels[1].pcieDpmLevel = mid_pcie_level_enabled; + } + /* level count will send to smc once at init smc table and never change */ + result = smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels, + (uint32_t)array_size, SMC_RAM_END); + + return result; +} + + +/** + * MCLK Frequency Ratio + * SEQ_CG_RESP Bit[31:24] - 0x0 + * Bit[27:24] \96 DDR3 Frequency ratio + * 0x0 <= 100MHz, 450 < 0x8 <= 500MHz + * 100 < 0x1 <= 150MHz, 500 < 0x9 <= 550MHz + * 150 < 0x2 <= 200MHz, 550 < 0xA <= 600MHz + * 200 < 0x3 <= 250MHz, 600 < 0xB <= 650MHz + * 250 < 0x4 <= 300MHz, 650 < 0xC <= 700MHz + * 300 < 0x5 <= 350MHz, 700 < 0xD <= 750MHz + * 350 < 0x6 <= 400MHz, 750 < 0xE <= 800MHz + * 400 < 0x7 <= 450MHz, 800 < 0xF + */ +static uint8_t fiji_get_mclk_frequency_ratio(uint32_t mem_clock) +{ + if (mem_clock <= 10000) + return 0x0; + if (mem_clock <= 15000) + return 0x1; + if (mem_clock <= 20000) + return 0x2; + if (mem_clock <= 25000) + return 0x3; + if (mem_clock <= 30000) + return 0x4; + if (mem_clock <= 35000) + return 0x5; + if (mem_clock <= 40000) + return 0x6; + if (mem_clock <= 45000) + return 0x7; + if (mem_clock <= 50000) + return 0x8; + if (mem_clock <= 55000) + return 0x9; + if (mem_clock <= 60000) + return 0xa; + if (mem_clock <= 65000) + return 0xb; + if (mem_clock <= 70000) + return 0xc; + if (mem_clock <= 75000) + return 0xd; + if (mem_clock <= 80000) + return 0xe; + /* mem_clock > 800MHz */ + return 0xf; +} + +static int fiji_calculate_mclk_params(struct pp_hwmgr *hwmgr, + uint32_t clock, struct SMU73_Discrete_MemoryLevel *mclk) +{ + struct pp_atomctrl_memory_clock_param mem_param; + int result; + + result = atomctrl_get_memory_pll_dividers_vi(hwmgr, clock, &mem_param); + PP_ASSERT_WITH_CODE((0 == result), + "Failed to get Memory PLL Dividers.", + ); + + /* Save the result data to outpupt memory level structure */ + mclk->MclkFrequency = clock; + mclk->MclkDivider = (uint8_t)mem_param.mpll_post_divider; + mclk->FreqRange = fiji_get_mclk_frequency_ratio(clock); + + return result; +} + +static int fiji_populate_single_memory_level(struct pp_hwmgr *hwmgr, + uint32_t clock, struct SMU73_Discrete_MemoryLevel *mem_level) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + int result = 0; + uint32_t mclk_stutter_mode_threshold = 60000; + + if (table_info->vdd_dep_on_mclk) { + result = fiji_get_dependency_volt_by_clk(hwmgr, + table_info->vdd_dep_on_mclk, clock, + (uint32_t *)(&mem_level->MinVoltage), &mem_level->MinMvdd); + PP_ASSERT_WITH_CODE((0 == result), + "can not find MinVddc voltage value from memory " + "VDDC voltage dependency table", return result); + } + + mem_level->EnabledForThrottle = 1; + mem_level->EnabledForActivity = 0; + mem_level->UpHyst = 0; + mem_level->DownHyst = 100; + mem_level->VoltageDownHyst = 0; + mem_level->ActivityLevel = (uint16_t)data->mclk_activity_target; + mem_level->StutterEnable = false; + + mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + + /* enable stutter mode if all the follow condition applied + * PECI_GetNumberOfActiveDisplays(hwmgr->pPECI, + * &(data->DisplayTiming.numExistingDisplays)); + */ + data->display_timing.num_existing_displays = 1; + + if (mclk_stutter_mode_threshold && + (clock <= mclk_stutter_mode_threshold) && + (!data->is_uvd_enabled) && + (PHM_READ_FIELD(hwmgr->device, DPG_PIPE_STUTTER_CONTROL, + STUTTER_ENABLE) & 0x1)) + mem_level->StutterEnable = true; + + result = fiji_calculate_mclk_params(hwmgr, clock, mem_level); + if (!result) { + CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MinMvdd); + CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MclkFrequency); + CONVERT_FROM_HOST_TO_SMC_US(mem_level->ActivityLevel); + CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MinVoltage); + } + return result; +} + +static int fiji_populate_all_memory_levels(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + struct smu7_dpm_table *dpm_table = &data->dpm_table; + int result; + /* populate MCLK dpm table to SMU7 */ + uint32_t array = smu_data->smu7_data.dpm_table_start + + offsetof(SMU73_Discrete_DpmTable, MemoryLevel); + uint32_t array_size = sizeof(SMU73_Discrete_MemoryLevel) * + SMU73_MAX_LEVELS_MEMORY; + struct SMU73_Discrete_MemoryLevel *levels = + smu_data->smc_state_table.MemoryLevel; + uint32_t i; + + for (i = 0; i < dpm_table->mclk_table.count; i++) { + PP_ASSERT_WITH_CODE((0 != dpm_table->mclk_table.dpm_levels[i].value), + "can not populate memory level as memory clock is zero", + return -EINVAL); + result = fiji_populate_single_memory_level(hwmgr, + dpm_table->mclk_table.dpm_levels[i].value, + &levels[i]); + if (result) + return result; + } + + /* Only enable level 0 for now. */ + levels[0].EnabledForActivity = 1; + + /* in order to prevent MC activity from stutter mode to push DPM up. + * the UVD change complements this by putting the MCLK in + * a higher state by default such that we are not effected by + * up threshold or and MCLK DPM latency. + */ + levels[0].ActivityLevel = (uint16_t)data->mclk_dpm0_activity_target; + CONVERT_FROM_HOST_TO_SMC_US(levels[0].ActivityLevel); + + smu_data->smc_state_table.MemoryDpmLevelCount = + (uint8_t)dpm_table->mclk_table.count; + data->dpm_level_enable_mask.mclk_dpm_enable_mask = + phm_get_dpm_level_enable_mask_value(&dpm_table->mclk_table); + /* set highest level watermark to high */ + levels[dpm_table->mclk_table.count - 1].DisplayWatermark = + PPSMC_DISPLAY_WATERMARK_HIGH; + + /* level count will send to smc once at init smc table and never change */ + result = smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels, + (uint32_t)array_size, SMC_RAM_END); + + return result; +} + +static int fiji_populate_mvdd_value(struct pp_hwmgr *hwmgr, + uint32_t mclk, SMIO_Pattern *smio_pat) +{ + const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + uint32_t i = 0; + + if (SMU7_VOLTAGE_CONTROL_NONE != data->mvdd_control) { + /* find mvdd value which clock is more than request */ + for (i = 0; i < table_info->vdd_dep_on_mclk->count; i++) { + if (mclk <= table_info->vdd_dep_on_mclk->entries[i].clk) { + smio_pat->Voltage = data->mvdd_voltage_table.entries[i].value; + break; + } + } + PP_ASSERT_WITH_CODE(i < table_info->vdd_dep_on_mclk->count, + "MVDD Voltage is outside the supported range.", + return -EINVAL); + } else + return -EINVAL; + + return 0; +} + +static int fiji_populate_smc_acpi_level(struct pp_hwmgr *hwmgr, + SMU73_Discrete_DpmTable *table) +{ + int result = 0; + const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct pp_atomctrl_clock_dividers_vi dividers; + SMIO_Pattern vol_level; + uint32_t mvdd; + uint16_t us_mvdd; + uint32_t spll_func_cntl = data->clock_registers.vCG_SPLL_FUNC_CNTL; + uint32_t spll_func_cntl_2 = data->clock_registers.vCG_SPLL_FUNC_CNTL_2; + + table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC; + + if (!data->sclk_dpm_key_disabled) { + /* Get MinVoltage and Frequency from DPM0, + * already converted to SMC_UL */ + table->ACPILevel.SclkFrequency = + data->dpm_table.sclk_table.dpm_levels[0].value; + result = fiji_get_dependency_volt_by_clk(hwmgr, + table_info->vdd_dep_on_sclk, + table->ACPILevel.SclkFrequency, + (uint32_t *)(&table->ACPILevel.MinVoltage), &mvdd); + PP_ASSERT_WITH_CODE((0 == result), + "Cannot find ACPI VDDC voltage value " \ + "in Clock Dependency Table", + ); + } else { + table->ACPILevel.SclkFrequency = + data->vbios_boot_state.sclk_bootup_value; + table->ACPILevel.MinVoltage = + data->vbios_boot_state.vddc_bootup_value * VOLTAGE_SCALE; + } + + /* get the engine clock dividers for this clock value */ + result = atomctrl_get_engine_pll_dividers_vi(hwmgr, + table->ACPILevel.SclkFrequency, ÷rs); + PP_ASSERT_WITH_CODE(result == 0, + "Error retrieving Engine Clock dividers from VBIOS.", + return result); + + table->ACPILevel.SclkDid = (uint8_t)dividers.pll_post_divider; + table->ACPILevel.DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + table->ACPILevel.DeepSleepDivId = 0; + + spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, CG_SPLL_FUNC_CNTL, + SPLL_PWRON, 0); + spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, CG_SPLL_FUNC_CNTL, + SPLL_RESET, 1); + spll_func_cntl_2 = PHM_SET_FIELD(spll_func_cntl_2, CG_SPLL_FUNC_CNTL_2, + SCLK_MUX_SEL, 4); + + table->ACPILevel.CgSpllFuncCntl = spll_func_cntl; + table->ACPILevel.CgSpllFuncCntl2 = spll_func_cntl_2; + table->ACPILevel.CgSpllFuncCntl3 = data->clock_registers.vCG_SPLL_FUNC_CNTL_3; + table->ACPILevel.CgSpllFuncCntl4 = data->clock_registers.vCG_SPLL_FUNC_CNTL_4; + table->ACPILevel.SpllSpreadSpectrum = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM; + table->ACPILevel.SpllSpreadSpectrum2 = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM_2; + table->ACPILevel.CcPwrDynRm = 0; + table->ACPILevel.CcPwrDynRm1 = 0; + + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.Flags); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SclkFrequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.MinVoltage); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl2); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl3); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl4); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SpllSpreadSpectrum); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SpllSpreadSpectrum2); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm1); + + if (!data->mclk_dpm_key_disabled) { + /* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */ + table->MemoryACPILevel.MclkFrequency = + data->dpm_table.mclk_table.dpm_levels[0].value; + result = fiji_get_dependency_volt_by_clk(hwmgr, + table_info->vdd_dep_on_mclk, + table->MemoryACPILevel.MclkFrequency, + (uint32_t *)(&table->MemoryACPILevel.MinVoltage), &mvdd); + PP_ASSERT_WITH_CODE((0 == result), + "Cannot find ACPI VDDCI voltage value in Clock Dependency Table", + ); + } else { + table->MemoryACPILevel.MclkFrequency = + data->vbios_boot_state.mclk_bootup_value; + table->MemoryACPILevel.MinVoltage = + data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE; + } + + us_mvdd = 0; + if ((SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) || + (data->mclk_dpm_key_disabled)) + us_mvdd = data->vbios_boot_state.mvdd_bootup_value; + else { + if (!fiji_populate_mvdd_value(hwmgr, + data->dpm_table.mclk_table.dpm_levels[0].value, + &vol_level)) + us_mvdd = vol_level.Voltage; + } + + table->MemoryACPILevel.MinMvdd = + PP_HOST_TO_SMC_UL(us_mvdd * VOLTAGE_SCALE); + + table->MemoryACPILevel.EnabledForThrottle = 0; + table->MemoryACPILevel.EnabledForActivity = 0; + table->MemoryACPILevel.UpHyst = 0; + table->MemoryACPILevel.DownHyst = 100; + table->MemoryACPILevel.VoltageDownHyst = 0; + table->MemoryACPILevel.ActivityLevel = + PP_HOST_TO_SMC_US((uint16_t)data->mclk_activity_target); + + table->MemoryACPILevel.StutterEnable = false; + CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MclkFrequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MinVoltage); + + return result; +} + +static int fiji_populate_smc_vce_level(struct pp_hwmgr *hwmgr, + SMU73_Discrete_DpmTable *table) +{ + int result = -EINVAL; + uint8_t count; + struct pp_atomctrl_clock_dividers_vi dividers; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = + table_info->mm_dep_table; + + table->VceLevelCount = (uint8_t)(mm_table->count); + table->VceBootLevel = 0; + + for (count = 0; count < table->VceLevelCount; count++) { + table->VceLevel[count].Frequency = mm_table->entries[count].eclk; + table->VceLevel[count].MinVoltage = 0; + table->VceLevel[count].MinVoltage |= + (mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT; + table->VceLevel[count].MinVoltage |= + ((mm_table->entries[count].vddc - VDDC_VDDCI_DELTA) * + VOLTAGE_SCALE) << VDDCI_SHIFT; + table->VceLevel[count].MinVoltage |= 1 << PHASES_SHIFT; + + /*retrieve divider value for VBIOS */ + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->VceLevel[count].Frequency, ÷rs); + PP_ASSERT_WITH_CODE((0 == result), + "can not find divide id for VCE engine clock", + return result); + + table->VceLevel[count].Divider = (uint8_t)dividers.pll_post_divider; + + CONVERT_FROM_HOST_TO_SMC_UL(table->VceLevel[count].Frequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->VceLevel[count].MinVoltage); + } + return result; +} + +static int fiji_populate_smc_acp_level(struct pp_hwmgr *hwmgr, + SMU73_Discrete_DpmTable *table) +{ + int result = -EINVAL; + uint8_t count; + struct pp_atomctrl_clock_dividers_vi dividers; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = + table_info->mm_dep_table; + + table->AcpLevelCount = (uint8_t)(mm_table->count); + table->AcpBootLevel = 0; + + for (count = 0; count < table->AcpLevelCount; count++) { + table->AcpLevel[count].Frequency = mm_table->entries[count].aclk; + table->AcpLevel[count].MinVoltage |= (mm_table->entries[count].vddc * + VOLTAGE_SCALE) << VDDC_SHIFT; + table->AcpLevel[count].MinVoltage |= ((mm_table->entries[count].vddc - + VDDC_VDDCI_DELTA) * VOLTAGE_SCALE) << VDDCI_SHIFT; + table->AcpLevel[count].MinVoltage |= 1 << PHASES_SHIFT; + + /* retrieve divider value for VBIOS */ + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->AcpLevel[count].Frequency, ÷rs); + PP_ASSERT_WITH_CODE((0 == result), + "can not find divide id for engine clock", return result); + + table->AcpLevel[count].Divider = (uint8_t)dividers.pll_post_divider; + + CONVERT_FROM_HOST_TO_SMC_UL(table->AcpLevel[count].Frequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->AcpLevel[count].MinVoltage); + } + return result; +} + +static int fiji_populate_smc_samu_level(struct pp_hwmgr *hwmgr, + SMU73_Discrete_DpmTable *table) +{ + int result = -EINVAL; + uint8_t count; + struct pp_atomctrl_clock_dividers_vi dividers; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = + table_info->mm_dep_table; + + table->SamuBootLevel = 0; + table->SamuLevelCount = (uint8_t)(mm_table->count); + + for (count = 0; count < table->SamuLevelCount; count++) { + /* not sure whether we need evclk or not */ + table->SamuLevel[count].MinVoltage = 0; + table->SamuLevel[count].Frequency = mm_table->entries[count].samclock; + table->SamuLevel[count].MinVoltage |= (mm_table->entries[count].vddc * + VOLTAGE_SCALE) << VDDC_SHIFT; + table->SamuLevel[count].MinVoltage |= ((mm_table->entries[count].vddc - + VDDC_VDDCI_DELTA) * VOLTAGE_SCALE) << VDDCI_SHIFT; + table->SamuLevel[count].MinVoltage |= 1 << PHASES_SHIFT; + + /* retrieve divider value for VBIOS */ + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->SamuLevel[count].Frequency, ÷rs); + PP_ASSERT_WITH_CODE((0 == result), + "can not find divide id for samu clock", return result); + + table->SamuLevel[count].Divider = (uint8_t)dividers.pll_post_divider; + + CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].Frequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].MinVoltage); + } + return result; +} + +static int fiji_populate_memory_timing_parameters(struct pp_hwmgr *hwmgr, + int32_t eng_clock, int32_t mem_clock, + struct SMU73_Discrete_MCArbDramTimingTableEntry *arb_regs) +{ + uint32_t dram_timing; + uint32_t dram_timing2; + uint32_t burstTime; + ULONG state, trrds, trrdl; + int result; + + result = atomctrl_set_engine_dram_timings_rv770(hwmgr, + eng_clock, mem_clock); + PP_ASSERT_WITH_CODE(result == 0, + "Error calling VBIOS to set DRAM_TIMING.", return result); + + dram_timing = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING); + dram_timing2 = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING2); + burstTime = cgs_read_register(hwmgr->device, mmMC_ARB_BURST_TIME); + + state = PHM_GET_FIELD(burstTime, MC_ARB_BURST_TIME, STATE0); + trrds = PHM_GET_FIELD(burstTime, MC_ARB_BURST_TIME, TRRDS0); + trrdl = PHM_GET_FIELD(burstTime, MC_ARB_BURST_TIME, TRRDL0); + + arb_regs->McArbDramTiming = PP_HOST_TO_SMC_UL(dram_timing); + arb_regs->McArbDramTiming2 = PP_HOST_TO_SMC_UL(dram_timing2); + arb_regs->McArbBurstTime = (uint8_t)burstTime; + arb_regs->TRRDS = (uint8_t)trrds; + arb_regs->TRRDL = (uint8_t)trrdl; + + return 0; +} + +static int fiji_program_memory_timing_parameters(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + struct SMU73_Discrete_MCArbDramTimingTable arb_regs; + uint32_t i, j; + int result = 0; + + for (i = 0; i < data->dpm_table.sclk_table.count; i++) { + for (j = 0; j < data->dpm_table.mclk_table.count; j++) { + result = fiji_populate_memory_timing_parameters(hwmgr, + data->dpm_table.sclk_table.dpm_levels[i].value, + data->dpm_table.mclk_table.dpm_levels[j].value, + &arb_regs.entries[i][j]); + if (result) + break; + } + } + + if (!result) + result = smu7_copy_bytes_to_smc( + hwmgr, + smu_data->smu7_data.arb_table_start, + (uint8_t *)&arb_regs, + sizeof(SMU73_Discrete_MCArbDramTimingTable), + SMC_RAM_END); + return result; +} + +static int fiji_populate_smc_uvd_level(struct pp_hwmgr *hwmgr, + struct SMU73_Discrete_DpmTable *table) +{ + int result = -EINVAL; + uint8_t count; + struct pp_atomctrl_clock_dividers_vi dividers; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = + table_info->mm_dep_table; + + table->UvdLevelCount = (uint8_t)(mm_table->count); + table->UvdBootLevel = 0; + + for (count = 0; count < table->UvdLevelCount; count++) { + table->UvdLevel[count].MinVoltage = 0; + table->UvdLevel[count].VclkFrequency = mm_table->entries[count].vclk; + table->UvdLevel[count].DclkFrequency = mm_table->entries[count].dclk; + table->UvdLevel[count].MinVoltage |= (mm_table->entries[count].vddc * + VOLTAGE_SCALE) << VDDC_SHIFT; + table->UvdLevel[count].MinVoltage |= ((mm_table->entries[count].vddc - + VDDC_VDDCI_DELTA) * VOLTAGE_SCALE) << VDDCI_SHIFT; + table->UvdLevel[count].MinVoltage |= 1 << PHASES_SHIFT; + + /* retrieve divider value for VBIOS */ + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->UvdLevel[count].VclkFrequency, ÷rs); + PP_ASSERT_WITH_CODE((0 == result), + "can not find divide id for Vclk clock", return result); + + table->UvdLevel[count].VclkDivider = (uint8_t)dividers.pll_post_divider; + + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->UvdLevel[count].DclkFrequency, ÷rs); + PP_ASSERT_WITH_CODE((0 == result), + "can not find divide id for Dclk clock", return result); + + table->UvdLevel[count].DclkDivider = (uint8_t)dividers.pll_post_divider; + + CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].VclkFrequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].DclkFrequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].MinVoltage); + + } + return result; +} + +static int fiji_populate_smc_boot_level(struct pp_hwmgr *hwmgr, + struct SMU73_Discrete_DpmTable *table) +{ + int result = 0; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + table->GraphicsBootLevel = 0; + table->MemoryBootLevel = 0; + + /* find boot level from dpm table */ + result = phm_find_boot_level(&(data->dpm_table.sclk_table), + data->vbios_boot_state.sclk_bootup_value, + (uint32_t *)&(table->GraphicsBootLevel)); + + result = phm_find_boot_level(&(data->dpm_table.mclk_table), + data->vbios_boot_state.mclk_bootup_value, + (uint32_t *)&(table->MemoryBootLevel)); + + table->BootVddc = data->vbios_boot_state.vddc_bootup_value * + VOLTAGE_SCALE; + table->BootVddci = data->vbios_boot_state.vddci_bootup_value * + VOLTAGE_SCALE; + table->BootMVdd = data->vbios_boot_state.mvdd_bootup_value * + VOLTAGE_SCALE; + + CONVERT_FROM_HOST_TO_SMC_US(table->BootVddc); + CONVERT_FROM_HOST_TO_SMC_US(table->BootVddci); + CONVERT_FROM_HOST_TO_SMC_US(table->BootMVdd); + + return 0; +} + +static int fiji_populate_smc_initailial_state(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + uint8_t count, level; + + count = (uint8_t)(table_info->vdd_dep_on_sclk->count); + for (level = 0; level < count; level++) { + if (table_info->vdd_dep_on_sclk->entries[level].clk >= + data->vbios_boot_state.sclk_bootup_value) { + smu_data->smc_state_table.GraphicsBootLevel = level; + break; + } + } + + count = (uint8_t)(table_info->vdd_dep_on_mclk->count); + for (level = 0; level < count; level++) { + if (table_info->vdd_dep_on_mclk->entries[level].clk >= + data->vbios_boot_state.mclk_bootup_value) { + smu_data->smc_state_table.MemoryBootLevel = level; + break; + } + } + + return 0; +} + +static int fiji_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr) +{ + uint32_t ro, efuse, efuse2, clock_freq, volt_without_cks, + volt_with_cks, value; + uint16_t clock_freq_u16; + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + uint8_t type, i, j, cks_setting, stretch_amount, stretch_amount2, + volt_offset = 0; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table = + table_info->vdd_dep_on_sclk; + + stretch_amount = (uint8_t)table_info->cac_dtp_table->usClockStretchAmount; + + /* Read SMU_Eefuse to read and calculate RO and determine + * if the part is SS or FF. if RO >= 1660MHz, part is FF. + */ + efuse = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixSMU_EFUSE_0 + (146 * 4)); + efuse2 = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixSMU_EFUSE_0 + (148 * 4)); + efuse &= 0xFF000000; + efuse = efuse >> 24; + efuse2 &= 0xF; + + if (efuse2 == 1) + ro = (2300 - 1350) * efuse / 255 + 1350; + else + ro = (2500 - 1000) * efuse / 255 + 1000; + + if (ro >= 1660) + type = 0; + else + type = 1; + + /* Populate Stretch amount */ + smu_data->smc_state_table.ClockStretcherAmount = stretch_amount; + + /* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */ + for (i = 0; i < sclk_table->count; i++) { + smu_data->smc_state_table.Sclk_CKS_masterEn0_7 |= + sclk_table->entries[i].cks_enable << i; + volt_without_cks = (uint32_t)((14041 * + (sclk_table->entries[i].clk/100) / 10000 + 3571 + 75 - ro) * 1000 / + (4026 - (13924 * (sclk_table->entries[i].clk/100) / 10000))); + volt_with_cks = (uint32_t)((13946 * + (sclk_table->entries[i].clk/100) / 10000 + 3320 + 45 - ro) * 1000 / + (3664 - (11454 * (sclk_table->entries[i].clk/100) / 10000))); + if (volt_without_cks >= volt_with_cks) + volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks + + sclk_table->entries[i].cks_voffset) * 100 / 625) + 1); + smu_data->smc_state_table.Sclk_voltageOffset[i] = volt_offset; + } + + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, + STRETCH_ENABLE, 0x0); + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, + masterReset, 0x1); + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, + staticEnable, 0x1); + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, + masterReset, 0x0); + + /* Populate CKS Lookup Table */ + if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5) + stretch_amount2 = 0; + else if (stretch_amount == 3 || stretch_amount == 4) + stretch_amount2 = 1; + else { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ClockStretcher); + PP_ASSERT_WITH_CODE(false, + "Stretch Amount in PPTable not supported\n", + return -EINVAL); + } + + value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixPWR_CKS_CNTL); + value &= 0xFFC2FF87; + smu_data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].minFreq = + fiji_clock_stretcher_lookup_table[stretch_amount2][0]; + smu_data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].maxFreq = + fiji_clock_stretcher_lookup_table[stretch_amount2][1]; + clock_freq_u16 = (uint16_t)(PP_SMC_TO_HOST_UL(smu_data->smc_state_table. + GraphicsLevel[smu_data->smc_state_table.GraphicsDpmLevelCount - 1]. + SclkFrequency) / 100); + if (fiji_clock_stretcher_lookup_table[stretch_amount2][0] < + clock_freq_u16 && + fiji_clock_stretcher_lookup_table[stretch_amount2][1] > + clock_freq_u16) { + /* Program PWR_CKS_CNTL. CKS_USE_FOR_LOW_FREQ */ + value |= (fiji_clock_stretcher_lookup_table[stretch_amount2][3]) << 16; + /* Program PWR_CKS_CNTL. CKS_LDO_REFSEL */ + value |= (fiji_clock_stretcher_lookup_table[stretch_amount2][2]) << 18; + /* Program PWR_CKS_CNTL. CKS_STRETCH_AMOUNT */ + value |= (fiji_clock_stretch_amount_conversion + [fiji_clock_stretcher_lookup_table[stretch_amount2][3]] + [stretch_amount]) << 3; + } + CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table.CKS_LOOKUPTable. + CKS_LOOKUPTableEntry[0].minFreq); + CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table.CKS_LOOKUPTable. + CKS_LOOKUPTableEntry[0].maxFreq); + smu_data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting = + fiji_clock_stretcher_lookup_table[stretch_amount2][2] & 0x7F; + smu_data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting |= + (fiji_clock_stretcher_lookup_table[stretch_amount2][3]) << 7; + + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixPWR_CKS_CNTL, value); + + /* Populate DDT Lookup Table */ + for (i = 0; i < 4; i++) { + /* Assign the minimum and maximum VID stored + * in the last row of Clock Stretcher Voltage Table. + */ + smu_data->smc_state_table.ClockStretcherDataTable. + ClockStretcherDataTableEntry[i].minVID = + (uint8_t) fiji_clock_stretcher_ddt_table[type][i][2]; + smu_data->smc_state_table.ClockStretcherDataTable. + ClockStretcherDataTableEntry[i].maxVID = + (uint8_t) fiji_clock_stretcher_ddt_table[type][i][3]; + /* Loop through each SCLK and check the frequency + * to see if it lies within the frequency for clock stretcher. + */ + for (j = 0; j < smu_data->smc_state_table.GraphicsDpmLevelCount; j++) { + cks_setting = 0; + clock_freq = PP_SMC_TO_HOST_UL( + smu_data->smc_state_table.GraphicsLevel[j].SclkFrequency); + /* Check the allowed frequency against the sclk level[j]. + * Sclk's endianness has already been converted, + * and it's in 10Khz unit, + * as opposed to Data table, which is in Mhz unit. + */ + if (clock_freq >= + (fiji_clock_stretcher_ddt_table[type][i][0]) * 100) { + cks_setting |= 0x2; + if (clock_freq < + (fiji_clock_stretcher_ddt_table[type][i][1]) * 100) + cks_setting |= 0x1; + } + smu_data->smc_state_table.ClockStretcherDataTable. + ClockStretcherDataTableEntry[i].setting |= cks_setting << (j * 2); + } + CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table. + ClockStretcherDataTable. + ClockStretcherDataTableEntry[i].setting); + } + + value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL); + value &= 0xFFFFFFFE; + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL, value); + + return 0; +} + +static int fiji_populate_vr_config(struct pp_hwmgr *hwmgr, + struct SMU73_Discrete_DpmTable *table) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint16_t config; + + config = VR_MERGED_WITH_VDDC; + table->VRConfig |= (config << VRCONF_VDDGFX_SHIFT); + + /* Set Vddc Voltage Controller */ + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) { + config = VR_SVI2_PLANE_1; + table->VRConfig |= config; + } else { + PP_ASSERT_WITH_CODE(false, + "VDDC should be on SVI2 control in merged mode!", + ); + } + /* Set Vddci Voltage Controller */ + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) { + config = VR_SVI2_PLANE_2; /* only in merged mode */ + table->VRConfig |= (config << VRCONF_VDDCI_SHIFT); + } else if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) { + config = VR_SMIO_PATTERN_1; + table->VRConfig |= (config << VRCONF_VDDCI_SHIFT); + } else { + config = VR_STATIC_VOLTAGE; + table->VRConfig |= (config << VRCONF_VDDCI_SHIFT); + } + /* Set Mvdd Voltage Controller */ + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->mvdd_control) { + config = VR_SVI2_PLANE_2; + table->VRConfig |= (config << VRCONF_MVDD_SHIFT); + } else if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) { + config = VR_SMIO_PATTERN_2; + table->VRConfig |= (config << VRCONF_MVDD_SHIFT); + } else { + config = VR_STATIC_VOLTAGE; + table->VRConfig |= (config << VRCONF_MVDD_SHIFT); + } + + return 0; +} + +static int fiji_init_arb_table_index(struct pp_hwmgr *hwmgr) +{ + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + uint32_t tmp; + int result; + + /* This is a read-modify-write on the first byte of the ARB table. + * The first byte in the SMU73_Discrete_MCArbDramTimingTable structure + * is the field 'current'. + * This solution is ugly, but we never write the whole table only + * individual fields in it. + * In reality this field should not be in that structure + * but in a soft register. + */ + result = smu7_read_smc_sram_dword(hwmgr, + smu_data->smu7_data.arb_table_start, &tmp, SMC_RAM_END); + + if (result) + return result; + + tmp &= 0x00FFFFFF; + tmp |= ((uint32_t)MC_CG_ARB_FREQ_F1) << 24; + + return smu7_write_smc_sram_dword(hwmgr, + smu_data->smu7_data.arb_table_start, tmp, SMC_RAM_END); +} + +static int fiji_save_default_power_profile(struct pp_hwmgr *hwmgr) +{ + struct fiji_smumgr *data = (struct fiji_smumgr *)(hwmgr->smu_backend); + struct SMU73_Discrete_GraphicsLevel *levels = + data->smc_state_table.GraphicsLevel; + unsigned min_level = 1; + + hwmgr->default_gfx_power_profile.activity_threshold = + be16_to_cpu(levels[0].ActivityLevel); + hwmgr->default_gfx_power_profile.up_hyst = levels[0].UpHyst; + hwmgr->default_gfx_power_profile.down_hyst = levels[0].DownHyst; + hwmgr->default_gfx_power_profile.type = AMD_PP_GFX_PROFILE; + + hwmgr->default_compute_power_profile = hwmgr->default_gfx_power_profile; + hwmgr->default_compute_power_profile.type = AMD_PP_COMPUTE_PROFILE; + + /* Workaround compute SDMA instability: disable lowest SCLK + * DPM level. Optimize compute power profile: Use only highest + * 2 power levels (if more than 2 are available), Hysteresis: + * 0ms up, 5ms down + */ + if (data->smc_state_table.GraphicsDpmLevelCount > 2) + min_level = data->smc_state_table.GraphicsDpmLevelCount - 2; + else if (data->smc_state_table.GraphicsDpmLevelCount == 2) + min_level = 1; + else + min_level = 0; + hwmgr->default_compute_power_profile.min_sclk = + be32_to_cpu(levels[min_level].SclkFrequency); + hwmgr->default_compute_power_profile.up_hyst = 0; + hwmgr->default_compute_power_profile.down_hyst = 5; + + hwmgr->gfx_power_profile = hwmgr->default_gfx_power_profile; + hwmgr->compute_power_profile = hwmgr->default_compute_power_profile; + + return 0; +} + +static int fiji_setup_dpm_led_config(struct pp_hwmgr *hwmgr) +{ + pp_atomctrl_voltage_table param_led_dpm; + int result = 0; + u32 mask = 0; + + result = atomctrl_get_voltage_table_v3(hwmgr, + VOLTAGE_TYPE_LEDDPM, VOLTAGE_OBJ_GPIO_LUT, + ¶m_led_dpm); + if (result == 0) { + int i, j; + u32 tmp = param_led_dpm.mask_low; + + for (i = 0, j = 0; i < 32; i++) { + if (tmp & 1) { + mask |= (i << (8 * j)); + if (++j >= 3) + break; + } + tmp >>= 1; + } + } + if (mask) + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_LedConfig, + mask); + return 0; +} + +static int fiji_init_smc_table(struct pp_hwmgr *hwmgr) +{ + int result; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct SMU73_Discrete_DpmTable *table = &(smu_data->smc_state_table); + uint8_t i; + struct pp_atomctrl_gpio_pin_assignment gpio_pin; + + fiji_initialize_power_tune_defaults(hwmgr); + + if (SMU7_VOLTAGE_CONTROL_NONE != data->voltage_control) + fiji_populate_smc_voltage_tables(hwmgr, table); + + table->SystemFlags = 0; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition)) + table->SystemFlags |= PPSMC_SYSTEMFLAG_GPIO_DC; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StepVddc)) + table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC; + + if (data->is_memory_gddr5) + table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5; + + if (data->ulv_supported && table_info->us_ulv_voltage_offset) { + result = fiji_populate_ulv_state(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize ULV state!", return result); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixCG_ULV_PARAMETER, 0x40035); + } + + result = fiji_populate_smc_link_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize Link Level!", return result); + + result = fiji_populate_all_graphic_levels(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize Graphics Level!", return result); + + result = fiji_populate_all_memory_levels(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize Memory Level!", return result); + + result = fiji_populate_smc_acpi_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize ACPI Level!", return result); + + result = fiji_populate_smc_vce_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize VCE Level!", return result); + + result = fiji_populate_smc_acp_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize ACP Level!", return result); + + result = fiji_populate_smc_samu_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize SAMU Level!", return result); + + /* Since only the initial state is completely set up at this point + * (the other states are just copies of the boot state) we only + * need to populate the ARB settings for the initial state. + */ + result = fiji_program_memory_timing_parameters(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to Write ARB settings for the initial state.", return result); + + result = fiji_populate_smc_uvd_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize UVD Level!", return result); + + result = fiji_populate_smc_boot_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize Boot Level!", return result); + + result = fiji_populate_smc_initailial_state(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize Boot State!", return result); + + result = fiji_populate_bapm_parameters_in_dpm_table(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to populate BAPM Parameters!", return result); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ClockStretcher)) { + result = fiji_populate_clock_stretcher_data_table(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to populate Clock Stretcher Data Table!", + return result); + } + + table->GraphicsVoltageChangeEnable = 1; + table->GraphicsThermThrottleEnable = 1; + table->GraphicsInterval = 1; + table->VoltageInterval = 1; + table->ThermalInterval = 1; + table->TemperatureLimitHigh = + table_info->cac_dtp_table->usTargetOperatingTemp * + SMU7_Q88_FORMAT_CONVERSION_UNIT; + table->TemperatureLimitLow = + (table_info->cac_dtp_table->usTargetOperatingTemp - 1) * + SMU7_Q88_FORMAT_CONVERSION_UNIT; + table->MemoryVoltageChangeEnable = 1; + table->MemoryInterval = 1; + table->VoltageResponseTime = 0; + table->PhaseResponseTime = 0; + table->MemoryThermThrottleEnable = 1; + table->PCIeBootLinkLevel = 0; /* 0:Gen1 1:Gen2 2:Gen3*/ + table->PCIeGenInterval = 1; + table->VRConfig = 0; + + result = fiji_populate_vr_config(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to populate VRConfig setting!", return result); + + table->ThermGpio = 17; + table->SclkStepSize = 0x4000; + + if (atomctrl_get_pp_assign_pin(hwmgr, VDDC_VRHOT_GPIO_PINID, &gpio_pin)) { + table->VRHotGpio = gpio_pin.uc_gpio_pin_bit_shift; + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_RegulatorHot); + } else { + table->VRHotGpio = SMU7_UNUSED_GPIO_PIN; + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_RegulatorHot); + } + + if (atomctrl_get_pp_assign_pin(hwmgr, PP_AC_DC_SWITCH_GPIO_PINID, + &gpio_pin)) { + table->AcDcGpio = gpio_pin.uc_gpio_pin_bit_shift; + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition); + } else { + table->AcDcGpio = SMU7_UNUSED_GPIO_PIN; + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition); + } + + /* Thermal Output GPIO */ + if (atomctrl_get_pp_assign_pin(hwmgr, THERMAL_INT_OUTPUT_GPIO_PINID, + &gpio_pin)) { + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ThermalOutGPIO); + + table->ThermOutGpio = gpio_pin.uc_gpio_pin_bit_shift; + + /* For porlarity read GPIOPAD_A with assigned Gpio pin + * since VBIOS will program this register to set 'inactive state', + * driver can then determine 'active state' from this and + * program SMU with correct polarity + */ + table->ThermOutPolarity = (0 == (cgs_read_register(hwmgr->device, mmGPIOPAD_A) & + (1 << gpio_pin.uc_gpio_pin_bit_shift))) ? 1:0; + table->ThermOutMode = SMU7_THERM_OUT_MODE_THERM_ONLY; + + /* if required, combine VRHot/PCC with thermal out GPIO */ + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_RegulatorHot) && + phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_CombinePCCWithThermalSignal)) + table->ThermOutMode = SMU7_THERM_OUT_MODE_THERM_VRHOT; + } else { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ThermalOutGPIO); + table->ThermOutGpio = 17; + table->ThermOutPolarity = 1; + table->ThermOutMode = SMU7_THERM_OUT_MODE_DISABLE; + } + + for (i = 0; i < SMU73_MAX_ENTRIES_SMIO; i++) + table->Smio[i] = PP_HOST_TO_SMC_UL(table->Smio[i]); + + CONVERT_FROM_HOST_TO_SMC_UL(table->SystemFlags); + CONVERT_FROM_HOST_TO_SMC_UL(table->VRConfig); + CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask1); + CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask2); + CONVERT_FROM_HOST_TO_SMC_UL(table->SclkStepSize); + CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitHigh); + CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitLow); + CONVERT_FROM_HOST_TO_SMC_US(table->VoltageResponseTime); + CONVERT_FROM_HOST_TO_SMC_US(table->PhaseResponseTime); + + /* Upload all dpm data to SMC memory.(dpm level, dpm level count etc) */ + result = smu7_copy_bytes_to_smc(hwmgr, + smu_data->smu7_data.dpm_table_start + + offsetof(SMU73_Discrete_DpmTable, SystemFlags), + (uint8_t *)&(table->SystemFlags), + sizeof(SMU73_Discrete_DpmTable) - 3 * sizeof(SMU73_PIDController), + SMC_RAM_END); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to upload dpm data to SMC memory!", return result); + + result = fiji_init_arb_table_index(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to upload arb data to SMC memory!", return result); + + result = fiji_populate_pm_fuses(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to populate PM fuses to SMC memory!", return result); + + result = fiji_setup_dpm_led_config(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to setup dpm led config", return result); + + fiji_save_default_power_profile(hwmgr); + + return 0; +} + +static int fiji_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) +{ + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + + SMU73_Discrete_FanTable fan_table = { FDO_MODE_HARDWARE }; + uint32_t duty100; + uint32_t t_diff1, t_diff2, pwm_diff1, pwm_diff2; + uint16_t fdo_min, slope1, slope2; + uint32_t reference_clock; + int res; + uint64_t tmp64; + + if (hwmgr->thermal_controller.fanInfo.bNoFan) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + + if (smu_data->smu7_data.fan_table_start == 0) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + + duty100 = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + CG_FDO_CTRL1, FMAX_DUTY100); + + if (duty100 == 0) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + + tmp64 = hwmgr->thermal_controller.advanceFanControlParameters. + usPWMMin * duty100; + do_div(tmp64, 10000); + fdo_min = (uint16_t)tmp64; + + t_diff1 = hwmgr->thermal_controller.advanceFanControlParameters.usTMed - + hwmgr->thermal_controller.advanceFanControlParameters.usTMin; + t_diff2 = hwmgr->thermal_controller.advanceFanControlParameters.usTHigh - + hwmgr->thermal_controller.advanceFanControlParameters.usTMed; + + pwm_diff1 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMMed - + hwmgr->thermal_controller.advanceFanControlParameters.usPWMMin; + pwm_diff2 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMHigh - + hwmgr->thermal_controller.advanceFanControlParameters.usPWMMed; + + slope1 = (uint16_t)((50 + ((16 * duty100 * pwm_diff1) / t_diff1)) / 100); + slope2 = (uint16_t)((50 + ((16 * duty100 * pwm_diff2) / t_diff2)) / 100); + + fan_table.TempMin = cpu_to_be16((50 + hwmgr-> + thermal_controller.advanceFanControlParameters.usTMin) / 100); + fan_table.TempMed = cpu_to_be16((50 + hwmgr-> + thermal_controller.advanceFanControlParameters.usTMed) / 100); + fan_table.TempMax = cpu_to_be16((50 + hwmgr-> + thermal_controller.advanceFanControlParameters.usTMax) / 100); + + fan_table.Slope1 = cpu_to_be16(slope1); + fan_table.Slope2 = cpu_to_be16(slope2); + + fan_table.FdoMin = cpu_to_be16(fdo_min); + + fan_table.HystDown = cpu_to_be16(hwmgr-> + thermal_controller.advanceFanControlParameters.ucTHyst); + + fan_table.HystUp = cpu_to_be16(1); + + fan_table.HystSlope = cpu_to_be16(1); + + fan_table.TempRespLim = cpu_to_be16(5); + + reference_clock = smu7_get_xclk(hwmgr); + + fan_table.RefreshPeriod = cpu_to_be32((hwmgr-> + thermal_controller.advanceFanControlParameters.ulCycleDelay * + reference_clock) / 1600); + + fan_table.FdoMax = cpu_to_be16((uint16_t)duty100); + + fan_table.TempSrc = (uint8_t)PHM_READ_VFPF_INDIRECT_FIELD( + hwmgr->device, CGS_IND_REG__SMC, + CG_MULT_THERMAL_CTRL, TEMP_SEL); + + res = smu7_copy_bytes_to_smc(hwmgr, smu_data->smu7_data.fan_table_start, + (uint8_t *)&fan_table, (uint32_t)sizeof(fan_table), + SMC_RAM_END); + + if (!res && hwmgr->thermal_controller. + advanceFanControlParameters.ucMinimumPWMLimit) + res = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetFanMinPwm, + hwmgr->thermal_controller. + advanceFanControlParameters.ucMinimumPWMLimit); + + if (!res && hwmgr->thermal_controller. + advanceFanControlParameters.ulMinFanSCLKAcousticLimit) + res = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetFanSclkTarget, + hwmgr->thermal_controller. + advanceFanControlParameters.ulMinFanSCLKAcousticLimit); + + if (res) + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + + return 0; +} + + +static int fiji_thermal_avfs_enable(struct pp_hwmgr *hwmgr) +{ + int ret; + struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(hwmgr->smu_backend); + + if (smu_data->avfs.avfs_btc_status != AVFS_BTC_ENABLEAVFS) + return 0; + + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableAvfs); + + if (!ret) + /* If this param is not changed, this function could fire unnecessarily */ + smu_data->avfs.avfs_btc_status = AVFS_BTC_COMPLETED_PREVIOUSLY; + + return ret; +} + +static int fiji_program_mem_timing_parameters(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + if (data->need_update_smu7_dpm_table & + (DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_OD_UPDATE_MCLK)) + return fiji_program_memory_timing_parameters(hwmgr); + + return 0; +} + +static int fiji_update_sclk_threshold(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + + int result = 0; + uint32_t low_sclk_interrupt_threshold = 0; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SclkThrottleLowNotification) + && (hwmgr->gfx_arbiter.sclk_threshold != + data->low_sclk_interrupt_threshold)) { + data->low_sclk_interrupt_threshold = + hwmgr->gfx_arbiter.sclk_threshold; + low_sclk_interrupt_threshold = + data->low_sclk_interrupt_threshold; + + CONVERT_FROM_HOST_TO_SMC_UL(low_sclk_interrupt_threshold); + + result = smu7_copy_bytes_to_smc( + hwmgr, + smu_data->smu7_data.dpm_table_start + + offsetof(SMU73_Discrete_DpmTable, + LowSclkInterruptThreshold), + (uint8_t *)&low_sclk_interrupt_threshold, + sizeof(uint32_t), + SMC_RAM_END); + } + result = fiji_program_mem_timing_parameters(hwmgr); + PP_ASSERT_WITH_CODE((result == 0), + "Failed to program memory timing parameters!", + ); + return result; +} + +static uint32_t fiji_get_offsetof(uint32_t type, uint32_t member) +{ + switch (type) { + case SMU_SoftRegisters: + switch (member) { + case HandshakeDisables: + return offsetof(SMU73_SoftRegisters, HandshakeDisables); + case VoltageChangeTimeout: + return offsetof(SMU73_SoftRegisters, VoltageChangeTimeout); + case AverageGraphicsActivity: + return offsetof(SMU73_SoftRegisters, AverageGraphicsActivity); + case PreVBlankGap: + return offsetof(SMU73_SoftRegisters, PreVBlankGap); + case VBlankTimeout: + return offsetof(SMU73_SoftRegisters, VBlankTimeout); + case UcodeLoadStatus: + return offsetof(SMU73_SoftRegisters, UcodeLoadStatus); + case DRAM_LOG_ADDR_H: + return offsetof(SMU73_SoftRegisters, DRAM_LOG_ADDR_H); + case DRAM_LOG_ADDR_L: + return offsetof(SMU73_SoftRegisters, DRAM_LOG_ADDR_L); + case DRAM_LOG_PHY_ADDR_H: + return offsetof(SMU73_SoftRegisters, DRAM_LOG_PHY_ADDR_H); + case DRAM_LOG_PHY_ADDR_L: + return offsetof(SMU73_SoftRegisters, DRAM_LOG_PHY_ADDR_L); + case DRAM_LOG_BUFF_SIZE: + return offsetof(SMU73_SoftRegisters, DRAM_LOG_BUFF_SIZE); + } + case SMU_Discrete_DpmTable: + switch (member) { + case UvdBootLevel: + return offsetof(SMU73_Discrete_DpmTable, UvdBootLevel); + case VceBootLevel: + return offsetof(SMU73_Discrete_DpmTable, VceBootLevel); + case SamuBootLevel: + return offsetof(SMU73_Discrete_DpmTable, SamuBootLevel); + case LowSclkInterruptThreshold: + return offsetof(SMU73_Discrete_DpmTable, LowSclkInterruptThreshold); + } + } + pr_warn("can't get the offset of type %x member %x\n", type, member); + return 0; +} + +static uint32_t fiji_get_mac_definition(uint32_t value) +{ + switch (value) { + case SMU_MAX_LEVELS_GRAPHICS: + return SMU73_MAX_LEVELS_GRAPHICS; + case SMU_MAX_LEVELS_MEMORY: + return SMU73_MAX_LEVELS_MEMORY; + case SMU_MAX_LEVELS_LINK: + return SMU73_MAX_LEVELS_LINK; + case SMU_MAX_ENTRIES_SMIO: + return SMU73_MAX_ENTRIES_SMIO; + case SMU_MAX_LEVELS_VDDC: + return SMU73_MAX_LEVELS_VDDC; + case SMU_MAX_LEVELS_VDDGFX: + return SMU73_MAX_LEVELS_VDDGFX; + case SMU_MAX_LEVELS_VDDCI: + return SMU73_MAX_LEVELS_VDDCI; + case SMU_MAX_LEVELS_MVDD: + return SMU73_MAX_LEVELS_MVDD; + } + + pr_warn("can't get the mac of %x\n", value); + return 0; +} + + +static int fiji_update_uvd_smc_table(struct pp_hwmgr *hwmgr) +{ + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + uint32_t mm_boot_level_offset, mm_boot_level_value; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + smu_data->smc_state_table.UvdBootLevel = 0; + if (table_info->mm_dep_table->count > 0) + smu_data->smc_state_table.UvdBootLevel = + (uint8_t) (table_info->mm_dep_table->count - 1); + mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + offsetof(SMU73_Discrete_DpmTable, + UvdBootLevel); + mm_boot_level_offset /= 4; + mm_boot_level_offset *= 4; + mm_boot_level_value = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset); + mm_boot_level_value &= 0x00FFFFFF; + mm_boot_level_value |= smu_data->smc_state_table.UvdBootLevel << 24; + cgs_write_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); + + if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_UVDDPM) || + phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StablePState)) + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_UVDDPM_SetEnabledMask, + (uint32_t)(1 << smu_data->smc_state_table.UvdBootLevel)); + return 0; +} + +static int fiji_update_vce_smc_table(struct pp_hwmgr *hwmgr) +{ + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + uint32_t mm_boot_level_offset, mm_boot_level_value; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StablePState)) + smu_data->smc_state_table.VceBootLevel = + (uint8_t) (table_info->mm_dep_table->count - 1); + else + smu_data->smc_state_table.VceBootLevel = 0; + + mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + + offsetof(SMU73_Discrete_DpmTable, VceBootLevel); + mm_boot_level_offset /= 4; + mm_boot_level_offset *= 4; + mm_boot_level_value = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset); + mm_boot_level_value &= 0xFF00FFFF; + mm_boot_level_value |= smu_data->smc_state_table.VceBootLevel << 16; + cgs_write_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState)) + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_VCEDPM_SetEnabledMask, + (uint32_t)1 << smu_data->smc_state_table.VceBootLevel); + return 0; +} + +static int fiji_update_samu_smc_table(struct pp_hwmgr *hwmgr) +{ + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + uint32_t mm_boot_level_offset, mm_boot_level_value; + + + smu_data->smc_state_table.SamuBootLevel = 0; + mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + + offsetof(SMU73_Discrete_DpmTable, SamuBootLevel); + + mm_boot_level_offset /= 4; + mm_boot_level_offset *= 4; + mm_boot_level_value = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset); + mm_boot_level_value &= 0xFFFFFF00; + mm_boot_level_value |= smu_data->smc_state_table.SamuBootLevel << 0; + cgs_write_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StablePState)) + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SAMUDPM_SetEnabledMask, + (uint32_t)(1 << smu_data->smc_state_table.SamuBootLevel)); + return 0; +} + +static int fiji_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type) +{ + switch (type) { + case SMU_UVD_TABLE: + fiji_update_uvd_smc_table(hwmgr); + break; + case SMU_VCE_TABLE: + fiji_update_vce_smc_table(hwmgr); + break; + case SMU_SAMU_TABLE: + fiji_update_samu_smc_table(hwmgr); + break; + default: + break; + } + return 0; +} + +static int fiji_process_firmware_header(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); + uint32_t tmp; + int result; + bool error = false; + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU73_Firmware_Header, DpmTable), + &tmp, SMC_RAM_END); + + if (0 == result) + smu_data->smu7_data.dpm_table_start = tmp; + + error |= (0 != result); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU73_Firmware_Header, SoftRegisters), + &tmp, SMC_RAM_END); + + if (!result) { + data->soft_regs_start = tmp; + smu_data->smu7_data.soft_regs_start = tmp; + } + + error |= (0 != result); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU73_Firmware_Header, mcRegisterTable), + &tmp, SMC_RAM_END); + + if (!result) + smu_data->smu7_data.mc_reg_table_start = tmp; + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU73_Firmware_Header, FanTable), + &tmp, SMC_RAM_END); + + if (!result) + smu_data->smu7_data.fan_table_start = tmp; + + error |= (0 != result); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU73_Firmware_Header, mcArbDramTimingTable), + &tmp, SMC_RAM_END); + + if (!result) + smu_data->smu7_data.arb_table_start = tmp; + + error |= (0 != result); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU73_Firmware_Header, Version), + &tmp, SMC_RAM_END); + + if (!result) + hwmgr->microcode_version_info.SMC = tmp; + + error |= (0 != result); + + return error ? -1 : 0; +} + +static int fiji_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) +{ + + /* Program additional LP registers + * that are no longer programmed by VBIOS + */ + cgs_write_register(hwmgr->device, mmMC_SEQ_RAS_TIMING_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_RAS_TIMING)); + cgs_write_register(hwmgr->device, mmMC_SEQ_CAS_TIMING_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_CAS_TIMING)); + cgs_write_register(hwmgr->device, mmMC_SEQ_MISC_TIMING2_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_MISC_TIMING2)); + cgs_write_register(hwmgr->device, mmMC_SEQ_WR_CTL_D1_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_WR_CTL_D1)); + cgs_write_register(hwmgr->device, mmMC_SEQ_RD_CTL_D0_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_RD_CTL_D0)); + cgs_write_register(hwmgr->device, mmMC_SEQ_RD_CTL_D1_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_RD_CTL_D1)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_TIMING_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_PMG_TIMING)); + + return 0; +} + +static bool fiji_is_dpm_running(struct pp_hwmgr *hwmgr) +{ + return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) + ? true : false; +} + +static int fiji_populate_requested_graphic_levels(struct pp_hwmgr *hwmgr, + struct amd_pp_profile *request) +{ + struct fiji_smumgr *smu_data = (struct fiji_smumgr *) + (hwmgr->smu_backend); + struct SMU73_Discrete_GraphicsLevel *levels = + smu_data->smc_state_table.GraphicsLevel; + uint32_t array = smu_data->smu7_data.dpm_table_start + + offsetof(SMU73_Discrete_DpmTable, GraphicsLevel); + uint32_t array_size = sizeof(struct SMU73_Discrete_GraphicsLevel) * + SMU73_MAX_LEVELS_GRAPHICS; + uint32_t i; + + for (i = 0; i < smu_data->smc_state_table.GraphicsDpmLevelCount; i++) { + levels[i].ActivityLevel = + cpu_to_be16(request->activity_threshold); + levels[i].EnabledForActivity = 1; + levels[i].UpHyst = request->up_hyst; + levels[i].DownHyst = request->down_hyst; + } + + return smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels, + array_size, SMC_RAM_END); +} const struct pp_smumgr_func fiji_smu_funcs = { .smu_init = &fiji_smu_init, diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h index 175bf9f8ef9c..279647772578 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h @@ -28,6 +28,15 @@ #include "smu7_smumgr.h" +struct fiji_pt_defaults { + uint8_t SviLoadLineEn; + uint8_t SviLoadLineVddC; + uint8_t TDC_VDDC_ThrottleReleaseLimitPerc; + uint8_t TDC_MAWt; + uint8_t TdcWaterfallCtl; + uint8_t DTEAmbientTempBase; +}; + struct fiji_smumgr { struct smu7_smumgr smu7_data; struct SMU73_Discrete_DpmTable smc_state_table; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smc.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smc.c deleted file mode 100644 index efb0fc033274..000000000000 --- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smc.c +++ /dev/null @@ -1,2568 +0,0 @@ -/* - * Copyright 2015 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * - */ - -#include "pp_debug.h" -#include "iceland_smc.h" -#include "smu7_dyn_defaults.h" - -#include "smu7_hwmgr.h" -#include "hardwaremanager.h" -#include "ppatomctrl.h" -#include "cgs_common.h" -#include "atombios.h" -#include "pppcielanes.h" -#include "pp_endian.h" -#include "smu7_ppsmc.h" - -#include "smu71_discrete.h" - -#include "smu/smu_7_1_1_d.h" -#include "smu/smu_7_1_1_sh_mask.h" - -#include "gmc/gmc_8_1_d.h" -#include "gmc/gmc_8_1_sh_mask.h" - -#include "bif/bif_5_0_d.h" -#include "bif/bif_5_0_sh_mask.h" - -#include "dce/dce_10_0_d.h" -#include "dce/dce_10_0_sh_mask.h" -#include "processpptables.h" - -#include "iceland_smumgr.h" - -#define VOLTAGE_SCALE 4 -#define POWERTUNE_DEFAULT_SET_MAX 1 -#define VOLTAGE_VID_OFFSET_SCALE1 625 -#define VOLTAGE_VID_OFFSET_SCALE2 100 -#define MC_CG_ARB_FREQ_F1 0x0b -#define VDDC_VDDCI_DELTA 200 - -#define DEVICE_ID_VI_ICELAND_M_6900 0x6900 -#define DEVICE_ID_VI_ICELAND_M_6901 0x6901 -#define DEVICE_ID_VI_ICELAND_M_6902 0x6902 -#define DEVICE_ID_VI_ICELAND_M_6903 0x6903 - -static const struct iceland_pt_defaults defaults_iceland = { - /* - * sviLoadLIneEn, SviLoadLineVddC, TDC_VDDC_ThrottleReleaseLimitPerc, - * TDC_MAWt, TdcWaterfallCtl, DTEAmbientTempBase, DisplayCac, BAPM_TEMP_GRADIENT - */ - 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0xB0000, - { 0x79, 0x253, 0x25D, 0xAE, 0x72, 0x80, 0x83, 0x86, 0x6F, 0xC8, 0xC9, 0xC9, 0x2F, 0x4D, 0x61 }, - { 0x17C, 0x172, 0x180, 0x1BC, 0x1B3, 0x1BD, 0x206, 0x200, 0x203, 0x25D, 0x25A, 0x255, 0x2C3, 0x2C5, 0x2B4 } -}; - -/* 35W - XT, XTL */ -static const struct iceland_pt_defaults defaults_icelandxt = { - /* - * sviLoadLIneEn, SviLoadLineVddC, - * TDC_VDDC_ThrottleReleaseLimitPerc, TDC_MAWt, - * TdcWaterfallCtl, DTEAmbientTempBase, DisplayCac, - * BAPM_TEMP_GRADIENT - */ - 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0x0, - { 0xA7, 0x0, 0x0, 0xB5, 0x0, 0x0, 0x9F, 0x0, 0x0, 0xD6, 0x0, 0x0, 0xD7, 0x0, 0x0}, - { 0x1EA, 0x0, 0x0, 0x224, 0x0, 0x0, 0x25E, 0x0, 0x0, 0x28E, 0x0, 0x0, 0x2AB, 0x0, 0x0} -}; - -/* 25W - PRO, LE */ -static const struct iceland_pt_defaults defaults_icelandpro = { - /* - * sviLoadLIneEn, SviLoadLineVddC, - * TDC_VDDC_ThrottleReleaseLimitPerc, TDC_MAWt, - * TdcWaterfallCtl, DTEAmbientTempBase, DisplayCac, - * BAPM_TEMP_GRADIENT - */ - 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0x0, - { 0xB7, 0x0, 0x0, 0xC3, 0x0, 0x0, 0xB5, 0x0, 0x0, 0xEA, 0x0, 0x0, 0xE6, 0x0, 0x0}, - { 0x1EA, 0x0, 0x0, 0x224, 0x0, 0x0, 0x25E, 0x0, 0x0, 0x28E, 0x0, 0x0, 0x2AB, 0x0, 0x0} -}; - -static void iceland_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr) -{ - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - struct cgs_system_info sys_info = {0}; - uint32_t dev_id; - - sys_info.size = sizeof(struct cgs_system_info); - sys_info.info_id = CGS_SYSTEM_INFO_PCIE_DEV; - cgs_query_system_info(hwmgr->device, &sys_info); - dev_id = (uint32_t)sys_info.value; - - switch (dev_id) { - case DEVICE_ID_VI_ICELAND_M_6900: - case DEVICE_ID_VI_ICELAND_M_6903: - smu_data->power_tune_defaults = &defaults_icelandxt; - break; - - case DEVICE_ID_VI_ICELAND_M_6901: - case DEVICE_ID_VI_ICELAND_M_6902: - smu_data->power_tune_defaults = &defaults_icelandpro; - break; - default: - smu_data->power_tune_defaults = &defaults_iceland; - pr_warn("Unknown V.I. Device ID.\n"); - break; - } - return; -} - -static int iceland_populate_svi_load_line(struct pp_hwmgr *hwmgr) -{ - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - const struct iceland_pt_defaults *defaults = smu_data->power_tune_defaults; - - smu_data->power_tune_table.SviLoadLineEn = defaults->svi_load_line_en; - smu_data->power_tune_table.SviLoadLineVddC = defaults->svi_load_line_vddc; - smu_data->power_tune_table.SviLoadLineTrimVddC = 3; - smu_data->power_tune_table.SviLoadLineOffsetVddC = 0; - - return 0; -} - -static int iceland_populate_tdc_limit(struct pp_hwmgr *hwmgr) -{ - uint16_t tdc_limit; - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - const struct iceland_pt_defaults *defaults = smu_data->power_tune_defaults; - - tdc_limit = (uint16_t)(hwmgr->dyn_state.cac_dtp_table->usTDC * 256); - smu_data->power_tune_table.TDC_VDDC_PkgLimit = - CONVERT_FROM_HOST_TO_SMC_US(tdc_limit); - smu_data->power_tune_table.TDC_VDDC_ThrottleReleaseLimitPerc = - defaults->tdc_vddc_throttle_release_limit_perc; - smu_data->power_tune_table.TDC_MAWt = defaults->tdc_mawt; - - return 0; -} - -static int iceland_populate_dw8(struct pp_hwmgr *hwmgr, uint32_t fuse_table_offset) -{ - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - const struct iceland_pt_defaults *defaults = smu_data->power_tune_defaults; - uint32_t temp; - - if (smu7_read_smc_sram_dword(hwmgr, - fuse_table_offset + - offsetof(SMU71_Discrete_PmFuses, TdcWaterfallCtl), - (uint32_t *)&temp, SMC_RAM_END)) - PP_ASSERT_WITH_CODE(false, - "Attempt to read PmFuses.DW6 (SviLoadLineEn) from SMC Failed!", - return -EINVAL); - else - smu_data->power_tune_table.TdcWaterfallCtl = defaults->tdc_waterfall_ctl; - - return 0; -} - -static int iceland_populate_temperature_scaler(struct pp_hwmgr *hwmgr) -{ - return 0; -} - -static int iceland_populate_gnb_lpml(struct pp_hwmgr *hwmgr) -{ - int i; - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - - /* Currently not used. Set all to zero. */ - for (i = 0; i < 8; i++) - smu_data->power_tune_table.GnbLPML[i] = 0; - - return 0; -} - -static int iceland_populate_bapm_vddc_base_leakage_sidd(struct pp_hwmgr *hwmgr) -{ - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - uint16_t HiSidd = smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd; - uint16_t LoSidd = smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd; - struct phm_cac_tdp_table *cac_table = hwmgr->dyn_state.cac_dtp_table; - - HiSidd = (uint16_t)(cac_table->usHighCACLeakage / 100 * 256); - LoSidd = (uint16_t)(cac_table->usLowCACLeakage / 100 * 256); - - smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd = - CONVERT_FROM_HOST_TO_SMC_US(HiSidd); - smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd = - CONVERT_FROM_HOST_TO_SMC_US(LoSidd); - - return 0; -} - -static int iceland_populate_bapm_vddc_vid_sidd(struct pp_hwmgr *hwmgr) -{ - int i; - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - uint8_t *hi_vid = smu_data->power_tune_table.BapmVddCVidHiSidd; - uint8_t *lo_vid = smu_data->power_tune_table.BapmVddCVidLoSidd; - - PP_ASSERT_WITH_CODE(NULL != hwmgr->dyn_state.cac_leakage_table, - "The CAC Leakage table does not exist!", return -EINVAL); - PP_ASSERT_WITH_CODE(hwmgr->dyn_state.cac_leakage_table->count <= 8, - "There should never be more than 8 entries for BapmVddcVid!!!", return -EINVAL); - PP_ASSERT_WITH_CODE(hwmgr->dyn_state.cac_leakage_table->count == hwmgr->dyn_state.vddc_dependency_on_sclk->count, - "CACLeakageTable->count and VddcDependencyOnSCLk->count not equal", return -EINVAL); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_EVV)) { - for (i = 0; (uint32_t) i < hwmgr->dyn_state.cac_leakage_table->count; i++) { - lo_vid[i] = convert_to_vid(hwmgr->dyn_state.cac_leakage_table->entries[i].Vddc1); - hi_vid[i] = convert_to_vid(hwmgr->dyn_state.cac_leakage_table->entries[i].Vddc2); - } - } else { - PP_ASSERT_WITH_CODE(false, "Iceland should always support EVV", return -EINVAL); - } - - return 0; -} - -static int iceland_populate_vddc_vid(struct pp_hwmgr *hwmgr) -{ - int i; - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - uint8_t *vid = smu_data->power_tune_table.VddCVid; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - PP_ASSERT_WITH_CODE(data->vddc_voltage_table.count <= 8, - "There should never be more than 8 entries for VddcVid!!!", - return -EINVAL); - - for (i = 0; i < (int)data->vddc_voltage_table.count; i++) { - vid[i] = convert_to_vid(data->vddc_voltage_table.entries[i].value); - } - - return 0; -} - - - -static int iceland_populate_pm_fuses(struct pp_hwmgr *hwmgr) -{ - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - uint32_t pm_fuse_table_offset; - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_PowerContainment)) { - if (smu7_read_smc_sram_dword(hwmgr, - SMU71_FIRMWARE_HEADER_LOCATION + - offsetof(SMU71_Firmware_Header, PmFuseTable), - &pm_fuse_table_offset, SMC_RAM_END)) - PP_ASSERT_WITH_CODE(false, - "Attempt to get pm_fuse_table_offset Failed!", - return -EINVAL); - - /* DW0 - DW3 */ - if (iceland_populate_bapm_vddc_vid_sidd(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate bapm vddc vid Failed!", - return -EINVAL); - - /* DW4 - DW5 */ - if (iceland_populate_vddc_vid(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate vddc vid Failed!", - return -EINVAL); - - /* DW6 */ - if (iceland_populate_svi_load_line(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate SviLoadLine Failed!", - return -EINVAL); - /* DW7 */ - if (iceland_populate_tdc_limit(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate TDCLimit Failed!", return -EINVAL); - /* DW8 */ - if (iceland_populate_dw8(hwmgr, pm_fuse_table_offset)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate TdcWaterfallCtl, " - "LPMLTemperature Min and Max Failed!", - return -EINVAL); - - /* DW9-DW12 */ - if (0 != iceland_populate_temperature_scaler(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate LPMLTemperatureScaler Failed!", - return -EINVAL); - - /* DW13-DW16 */ - if (iceland_populate_gnb_lpml(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate GnbLPML Failed!", - return -EINVAL); - - /* DW18 */ - if (iceland_populate_bapm_vddc_base_leakage_sidd(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate BapmVddCBaseLeakage Hi and Lo Sidd Failed!", - return -EINVAL); - - if (smu7_copy_bytes_to_smc(hwmgr, pm_fuse_table_offset, - (uint8_t *)&smu_data->power_tune_table, - sizeof(struct SMU71_Discrete_PmFuses), SMC_RAM_END)) - PP_ASSERT_WITH_CODE(false, - "Attempt to download PmFuseTable Failed!", - return -EINVAL); - } - return 0; -} - -static int iceland_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr, - struct phm_clock_voltage_dependency_table *allowed_clock_voltage_table, - uint32_t clock, uint32_t *vol) -{ - uint32_t i = 0; - - /* clock - voltage dependency table is empty table */ - if (allowed_clock_voltage_table->count == 0) - return -EINVAL; - - for (i = 0; i < allowed_clock_voltage_table->count; i++) { - /* find first sclk bigger than request */ - if (allowed_clock_voltage_table->entries[i].clk >= clock) { - *vol = allowed_clock_voltage_table->entries[i].v; - return 0; - } - } - - /* sclk is bigger than max sclk in the dependence table */ - *vol = allowed_clock_voltage_table->entries[i - 1].v; - - return 0; -} - -static int iceland_get_std_voltage_value_sidd(struct pp_hwmgr *hwmgr, - pp_atomctrl_voltage_table_entry *tab, uint16_t *hi, - uint16_t *lo) -{ - uint16_t v_index; - bool vol_found = false; - *hi = tab->value * VOLTAGE_SCALE; - *lo = tab->value * VOLTAGE_SCALE; - - /* SCLK/VDDC Dependency Table has to exist. */ - PP_ASSERT_WITH_CODE(NULL != hwmgr->dyn_state.vddc_dependency_on_sclk, - "The SCLK/VDDC Dependency Table does not exist.\n", - return -EINVAL); - - if (NULL == hwmgr->dyn_state.cac_leakage_table) { - pr_warn("CAC Leakage Table does not exist, using vddc.\n"); - return 0; - } - - /* - * Since voltage in the sclk/vddc dependency table is not - * necessarily in ascending order because of ELB voltage - * patching, loop through entire list to find exact voltage. - */ - for (v_index = 0; (uint32_t)v_index < hwmgr->dyn_state.vddc_dependency_on_sclk->count; v_index++) { - if (tab->value == hwmgr->dyn_state.vddc_dependency_on_sclk->entries[v_index].v) { - vol_found = true; - if ((uint32_t)v_index < hwmgr->dyn_state.cac_leakage_table->count) { - *lo = hwmgr->dyn_state.cac_leakage_table->entries[v_index].Vddc * VOLTAGE_SCALE; - *hi = (uint16_t)(hwmgr->dyn_state.cac_leakage_table->entries[v_index].Leakage * VOLTAGE_SCALE); - } else { - pr_warn("Index from SCLK/VDDC Dependency Table exceeds the CAC Leakage Table index, using maximum index from CAC table.\n"); - *lo = hwmgr->dyn_state.cac_leakage_table->entries[hwmgr->dyn_state.cac_leakage_table->count - 1].Vddc * VOLTAGE_SCALE; - *hi = (uint16_t)(hwmgr->dyn_state.cac_leakage_table->entries[hwmgr->dyn_state.cac_leakage_table->count - 1].Leakage * VOLTAGE_SCALE); - } - break; - } - } - - /* - * If voltage is not found in the first pass, loop again to - * find the best match, equal or higher value. - */ - if (!vol_found) { - for (v_index = 0; (uint32_t)v_index < hwmgr->dyn_state.vddc_dependency_on_sclk->count; v_index++) { - if (tab->value <= hwmgr->dyn_state.vddc_dependency_on_sclk->entries[v_index].v) { - vol_found = true; - if ((uint32_t)v_index < hwmgr->dyn_state.cac_leakage_table->count) { - *lo = hwmgr->dyn_state.cac_leakage_table->entries[v_index].Vddc * VOLTAGE_SCALE; - *hi = (uint16_t)(hwmgr->dyn_state.cac_leakage_table->entries[v_index].Leakage) * VOLTAGE_SCALE; - } else { - pr_warn("Index from SCLK/VDDC Dependency Table exceeds the CAC Leakage Table index in second look up, using maximum index from CAC table."); - *lo = hwmgr->dyn_state.cac_leakage_table->entries[hwmgr->dyn_state.cac_leakage_table->count - 1].Vddc * VOLTAGE_SCALE; - *hi = (uint16_t)(hwmgr->dyn_state.cac_leakage_table->entries[hwmgr->dyn_state.cac_leakage_table->count - 1].Leakage * VOLTAGE_SCALE); - } - break; - } - } - - if (!vol_found) - pr_warn("Unable to get std_vddc from SCLK/VDDC Dependency Table, using vddc.\n"); - } - - return 0; -} - -static int iceland_populate_smc_voltage_table(struct pp_hwmgr *hwmgr, - pp_atomctrl_voltage_table_entry *tab, - SMU71_Discrete_VoltageLevel *smc_voltage_tab) -{ - int result; - - result = iceland_get_std_voltage_value_sidd(hwmgr, tab, - &smc_voltage_tab->StdVoltageHiSidd, - &smc_voltage_tab->StdVoltageLoSidd); - if (0 != result) { - smc_voltage_tab->StdVoltageHiSidd = tab->value * VOLTAGE_SCALE; - smc_voltage_tab->StdVoltageLoSidd = tab->value * VOLTAGE_SCALE; - } - - smc_voltage_tab->Voltage = PP_HOST_TO_SMC_US(tab->value * VOLTAGE_SCALE); - CONVERT_FROM_HOST_TO_SMC_US(smc_voltage_tab->StdVoltageHiSidd); - CONVERT_FROM_HOST_TO_SMC_US(smc_voltage_tab->StdVoltageHiSidd); - - return 0; -} - -static int iceland_populate_smc_vddc_table(struct pp_hwmgr *hwmgr, - SMU71_Discrete_DpmTable *table) -{ - unsigned int count; - int result; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - table->VddcLevelCount = data->vddc_voltage_table.count; - for (count = 0; count < table->VddcLevelCount; count++) { - result = iceland_populate_smc_voltage_table(hwmgr, - &(data->vddc_voltage_table.entries[count]), - &(table->VddcLevel[count])); - PP_ASSERT_WITH_CODE(0 == result, "do not populate SMC VDDC voltage table", return -EINVAL); - - /* GPIO voltage control */ - if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->voltage_control) - table->VddcLevel[count].Smio |= data->vddc_voltage_table.entries[count].smio_low; - else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) - table->VddcLevel[count].Smio = 0; - } - - CONVERT_FROM_HOST_TO_SMC_UL(table->VddcLevelCount); - - return 0; -} - -static int iceland_populate_smc_vdd_ci_table(struct pp_hwmgr *hwmgr, - SMU71_Discrete_DpmTable *table) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - uint32_t count; - int result; - - table->VddciLevelCount = data->vddci_voltage_table.count; - - for (count = 0; count < table->VddciLevelCount; count++) { - result = iceland_populate_smc_voltage_table(hwmgr, - &(data->vddci_voltage_table.entries[count]), - &(table->VddciLevel[count])); - PP_ASSERT_WITH_CODE(result == 0, "do not populate SMC VDDCI voltage table", return -EINVAL); - if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) - table->VddciLevel[count].Smio |= data->vddci_voltage_table.entries[count].smio_low; - else - table->VddciLevel[count].Smio |= 0; - } - - CONVERT_FROM_HOST_TO_SMC_UL(table->VddciLevelCount); - - return 0; -} - -static int iceland_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr, - SMU71_Discrete_DpmTable *table) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - uint32_t count; - int result; - - table->MvddLevelCount = data->mvdd_voltage_table.count; - - for (count = 0; count < table->VddciLevelCount; count++) { - result = iceland_populate_smc_voltage_table(hwmgr, - &(data->mvdd_voltage_table.entries[count]), - &table->MvddLevel[count]); - PP_ASSERT_WITH_CODE(result == 0, "do not populate SMC mvdd voltage table", return -EINVAL); - if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) - table->MvddLevel[count].Smio |= data->mvdd_voltage_table.entries[count].smio_low; - else - table->MvddLevel[count].Smio |= 0; - } - - CONVERT_FROM_HOST_TO_SMC_UL(table->MvddLevelCount); - - return 0; -} - - -static int iceland_populate_smc_voltage_tables(struct pp_hwmgr *hwmgr, - SMU71_Discrete_DpmTable *table) -{ - int result; - - result = iceland_populate_smc_vddc_table(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "can not populate VDDC voltage table to SMC", return -EINVAL); - - result = iceland_populate_smc_vdd_ci_table(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "can not populate VDDCI voltage table to SMC", return -EINVAL); - - result = iceland_populate_smc_mvdd_table(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "can not populate MVDD voltage table to SMC", return -EINVAL); - - return 0; -} - -static int iceland_populate_ulv_level(struct pp_hwmgr *hwmgr, - struct SMU71_Discrete_Ulv *state) -{ - uint32_t voltage_response_time, ulv_voltage; - int result; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - state->CcPwrDynRm = 0; - state->CcPwrDynRm1 = 0; - - result = pp_tables_get_response_times(hwmgr, &voltage_response_time, &ulv_voltage); - PP_ASSERT_WITH_CODE((0 == result), "can not get ULV voltage value", return result;); - - if (ulv_voltage == 0) { - data->ulv_supported = false; - return 0; - } - - if (data->voltage_control != SMU7_VOLTAGE_CONTROL_BY_SVID2) { - /* use minimum voltage if ulv voltage in pptable is bigger than minimum voltage */ - if (ulv_voltage > hwmgr->dyn_state.vddc_dependency_on_sclk->entries[0].v) - state->VddcOffset = 0; - else - /* used in SMIO Mode. not implemented for now. this is backup only for CI. */ - state->VddcOffset = (uint16_t)(hwmgr->dyn_state.vddc_dependency_on_sclk->entries[0].v - ulv_voltage); - } else { - /* use minimum voltage if ulv voltage in pptable is bigger than minimum voltage */ - if (ulv_voltage > hwmgr->dyn_state.vddc_dependency_on_sclk->entries[0].v) - state->VddcOffsetVid = 0; - else /* used in SVI2 Mode */ - state->VddcOffsetVid = (uint8_t)( - (hwmgr->dyn_state.vddc_dependency_on_sclk->entries[0].v - ulv_voltage) - * VOLTAGE_VID_OFFSET_SCALE2 - / VOLTAGE_VID_OFFSET_SCALE1); - } - state->VddcPhase = 1; - - CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm); - CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm1); - CONVERT_FROM_HOST_TO_SMC_US(state->VddcOffset); - - return 0; -} - -static int iceland_populate_ulv_state(struct pp_hwmgr *hwmgr, - SMU71_Discrete_Ulv *ulv_level) -{ - return iceland_populate_ulv_level(hwmgr, ulv_level); -} - -static int iceland_populate_smc_link_level(struct pp_hwmgr *hwmgr, SMU71_Discrete_DpmTable *table) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct smu7_dpm_table *dpm_table = &data->dpm_table; - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - uint32_t i; - - /* Index (dpm_table->pcie_speed_table.count) is reserved for PCIE boot level. */ - for (i = 0; i <= dpm_table->pcie_speed_table.count; i++) { - table->LinkLevel[i].PcieGenSpeed = - (uint8_t)dpm_table->pcie_speed_table.dpm_levels[i].value; - table->LinkLevel[i].PcieLaneCount = - (uint8_t)encode_pcie_lane_width(dpm_table->pcie_speed_table.dpm_levels[i].param1); - table->LinkLevel[i].EnabledForActivity = - 1; - table->LinkLevel[i].SPC = - (uint8_t)(data->pcie_spc_cap & 0xff); - table->LinkLevel[i].DownThreshold = - PP_HOST_TO_SMC_UL(5); - table->LinkLevel[i].UpThreshold = - PP_HOST_TO_SMC_UL(30); - } - - smu_data->smc_state_table.LinkLevelCount = - (uint8_t)dpm_table->pcie_speed_table.count; - data->dpm_level_enable_mask.pcie_dpm_enable_mask = - phm_get_dpm_level_enable_mask_value(&dpm_table->pcie_speed_table); - - return 0; -} - -/** - * Calculates the SCLK dividers using the provided engine clock - * - * @param hwmgr the address of the hardware manager - * @param engine_clock the engine clock to use to populate the structure - * @param sclk the SMC SCLK structure to be populated - */ -static int iceland_calculate_sclk_params(struct pp_hwmgr *hwmgr, - uint32_t engine_clock, SMU71_Discrete_GraphicsLevel *sclk) -{ - const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - pp_atomctrl_clock_dividers_vi dividers; - uint32_t spll_func_cntl = data->clock_registers.vCG_SPLL_FUNC_CNTL; - uint32_t spll_func_cntl_3 = data->clock_registers.vCG_SPLL_FUNC_CNTL_3; - uint32_t spll_func_cntl_4 = data->clock_registers.vCG_SPLL_FUNC_CNTL_4; - uint32_t cg_spll_spread_spectrum = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM; - uint32_t cg_spll_spread_spectrum_2 = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM_2; - uint32_t reference_clock; - uint32_t reference_divider; - uint32_t fbdiv; - int result; - - /* get the engine clock dividers for this clock value*/ - result = atomctrl_get_engine_pll_dividers_vi(hwmgr, engine_clock, ÷rs); - - PP_ASSERT_WITH_CODE(result == 0, - "Error retrieving Engine Clock dividers from VBIOS.", return result); - - /* To get FBDIV we need to multiply this by 16384 and divide it by Fref.*/ - reference_clock = atomctrl_get_reference_clock(hwmgr); - - reference_divider = 1 + dividers.uc_pll_ref_div; - - /* low 14 bits is fraction and high 12 bits is divider*/ - fbdiv = dividers.ul_fb_div.ul_fb_divider & 0x3FFFFFF; - - /* SPLL_FUNC_CNTL setup*/ - spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, - CG_SPLL_FUNC_CNTL, SPLL_REF_DIV, dividers.uc_pll_ref_div); - spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, - CG_SPLL_FUNC_CNTL, SPLL_PDIV_A, dividers.uc_pll_post_div); - - /* SPLL_FUNC_CNTL_3 setup*/ - spll_func_cntl_3 = PHM_SET_FIELD(spll_func_cntl_3, - CG_SPLL_FUNC_CNTL_3, SPLL_FB_DIV, fbdiv); - - /* set to use fractional accumulation*/ - spll_func_cntl_3 = PHM_SET_FIELD(spll_func_cntl_3, - CG_SPLL_FUNC_CNTL_3, SPLL_DITHEN, 1); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_EngineSpreadSpectrumSupport)) { - pp_atomctrl_internal_ss_info ss_info; - - uint32_t vcoFreq = engine_clock * dividers.uc_pll_post_div; - if (0 == atomctrl_get_engine_clock_spread_spectrum(hwmgr, vcoFreq, &ss_info)) { - /* - * ss_info.speed_spectrum_percentage -- in unit of 0.01% - * ss_info.speed_spectrum_rate -- in unit of khz - */ - /* clks = reference_clock * 10 / (REFDIV + 1) / speed_spectrum_rate / 2 */ - uint32_t clkS = reference_clock * 5 / (reference_divider * ss_info.speed_spectrum_rate); - - /* clkv = 2 * D * fbdiv / NS */ - uint32_t clkV = 4 * ss_info.speed_spectrum_percentage * fbdiv / (clkS * 10000); - - cg_spll_spread_spectrum = - PHM_SET_FIELD(cg_spll_spread_spectrum, CG_SPLL_SPREAD_SPECTRUM, CLKS, clkS); - cg_spll_spread_spectrum = - PHM_SET_FIELD(cg_spll_spread_spectrum, CG_SPLL_SPREAD_SPECTRUM, SSEN, 1); - cg_spll_spread_spectrum_2 = - PHM_SET_FIELD(cg_spll_spread_spectrum_2, CG_SPLL_SPREAD_SPECTRUM_2, CLKV, clkV); - } - } - - sclk->SclkFrequency = engine_clock; - sclk->CgSpllFuncCntl3 = spll_func_cntl_3; - sclk->CgSpllFuncCntl4 = spll_func_cntl_4; - sclk->SpllSpreadSpectrum = cg_spll_spread_spectrum; - sclk->SpllSpreadSpectrum2 = cg_spll_spread_spectrum_2; - sclk->SclkDid = (uint8_t)dividers.pll_post_divider; - - return 0; -} - -static int iceland_populate_phase_value_based_on_sclk(struct pp_hwmgr *hwmgr, - const struct phm_phase_shedding_limits_table *pl, - uint32_t sclk, uint32_t *p_shed) -{ - unsigned int i; - - /* use the minimum phase shedding */ - *p_shed = 1; - - for (i = 0; i < pl->count; i++) { - if (sclk < pl->entries[i].Sclk) { - *p_shed = i; - break; - } - } - return 0; -} - -/** - * Populates single SMC SCLK structure using the provided engine clock - * - * @param hwmgr the address of the hardware manager - * @param engine_clock the engine clock to use to populate the structure - * @param sclk the SMC SCLK structure to be populated - */ -static int iceland_populate_single_graphic_level(struct pp_hwmgr *hwmgr, - uint32_t engine_clock, - uint16_t sclk_activity_level_threshold, - SMU71_Discrete_GraphicsLevel *graphic_level) -{ - int result; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - result = iceland_calculate_sclk_params(hwmgr, engine_clock, graphic_level); - - /* populate graphics levels*/ - result = iceland_get_dependency_volt_by_clk(hwmgr, - hwmgr->dyn_state.vddc_dependency_on_sclk, engine_clock, - &graphic_level->MinVddc); - PP_ASSERT_WITH_CODE((0 == result), - "can not find VDDC voltage value for VDDC \ - engine clock dependency table", return result); - - /* SCLK frequency in units of 10KHz*/ - graphic_level->SclkFrequency = engine_clock; - graphic_level->MinVddcPhases = 1; - - if (data->vddc_phase_shed_control) - iceland_populate_phase_value_based_on_sclk(hwmgr, - hwmgr->dyn_state.vddc_phase_shed_limits_table, - engine_clock, - &graphic_level->MinVddcPhases); - - /* Indicates maximum activity level for this performance level. 50% for now*/ - graphic_level->ActivityLevel = sclk_activity_level_threshold; - - graphic_level->CcPwrDynRm = 0; - graphic_level->CcPwrDynRm1 = 0; - /* this level can be used if activity is high enough.*/ - graphic_level->EnabledForActivity = 0; - /* this level can be used for throttling.*/ - graphic_level->EnabledForThrottle = 1; - graphic_level->UpHyst = 0; - graphic_level->DownHyst = 100; - graphic_level->VoltageDownHyst = 0; - graphic_level->PowerThrottle = 0; - - data->display_timing.min_clock_in_sr = - hwmgr->display_config.min_core_set_clock_in_sr; - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_SclkDeepSleep)) - graphic_level->DeepSleepDivId = - smu7_get_sleep_divider_id_from_clock(engine_clock, - data->display_timing.min_clock_in_sr); - - /* Default to slow, highest DPM level will be set to PPSMC_DISPLAY_WATERMARK_LOW later.*/ - graphic_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - - if (0 == result) { - graphic_level->MinVddc = PP_HOST_TO_SMC_UL(graphic_level->MinVddc * VOLTAGE_SCALE); - CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->MinVddcPhases); - CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->SclkFrequency); - CONVERT_FROM_HOST_TO_SMC_US(graphic_level->ActivityLevel); - CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->CgSpllFuncCntl3); - CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->CgSpllFuncCntl4); - CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->SpllSpreadSpectrum); - CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->SpllSpreadSpectrum2); - CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->CcPwrDynRm); - CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->CcPwrDynRm1); - } - - return result; -} - -/** - * Populates all SMC SCLK levels' structure based on the trimmed allowed dpm engine clock states - * - * @param hwmgr the address of the hardware manager - */ -int iceland_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - struct smu7_dpm_table *dpm_table = &data->dpm_table; - uint32_t level_array_adress = smu_data->smu7_data.dpm_table_start + - offsetof(SMU71_Discrete_DpmTable, GraphicsLevel); - - uint32_t level_array_size = sizeof(SMU71_Discrete_GraphicsLevel) * - SMU71_MAX_LEVELS_GRAPHICS; - - SMU71_Discrete_GraphicsLevel *levels = smu_data->smc_state_table.GraphicsLevel; - - uint32_t i; - uint8_t highest_pcie_level_enabled = 0; - uint8_t lowest_pcie_level_enabled = 0, mid_pcie_level_enabled = 0; - uint8_t count = 0; - int result = 0; - - memset(levels, 0x00, level_array_size); - - for (i = 0; i < dpm_table->sclk_table.count; i++) { - result = iceland_populate_single_graphic_level(hwmgr, - dpm_table->sclk_table.dpm_levels[i].value, - (uint16_t)smu_data->activity_target[i], - &(smu_data->smc_state_table.GraphicsLevel[i])); - if (result != 0) - return result; - - /* Making sure only DPM level 0-1 have Deep Sleep Div ID populated. */ - if (i > 1) - smu_data->smc_state_table.GraphicsLevel[i].DeepSleepDivId = 0; - } - - /* Only enable level 0 for now. */ - smu_data->smc_state_table.GraphicsLevel[0].EnabledForActivity = 1; - - /* set highest level watermark to high */ - if (dpm_table->sclk_table.count > 1) - smu_data->smc_state_table.GraphicsLevel[dpm_table->sclk_table.count-1].DisplayWatermark = - PPSMC_DISPLAY_WATERMARK_HIGH; - - smu_data->smc_state_table.GraphicsDpmLevelCount = - (uint8_t)dpm_table->sclk_table.count; - data->dpm_level_enable_mask.sclk_dpm_enable_mask = - phm_get_dpm_level_enable_mask_value(&dpm_table->sclk_table); - - while ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & - (1 << (highest_pcie_level_enabled + 1))) != 0) { - highest_pcie_level_enabled++; - } - - while ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & - (1 << lowest_pcie_level_enabled)) == 0) { - lowest_pcie_level_enabled++; - } - - while ((count < highest_pcie_level_enabled) && - ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & - (1 << (lowest_pcie_level_enabled + 1 + count))) == 0)) { - count++; - } - - mid_pcie_level_enabled = (lowest_pcie_level_enabled+1+count) < highest_pcie_level_enabled ? - (lowest_pcie_level_enabled+1+count) : highest_pcie_level_enabled; - - - /* set pcieDpmLevel to highest_pcie_level_enabled*/ - for (i = 2; i < dpm_table->sclk_table.count; i++) { - smu_data->smc_state_table.GraphicsLevel[i].pcieDpmLevel = highest_pcie_level_enabled; - } - - /* set pcieDpmLevel to lowest_pcie_level_enabled*/ - smu_data->smc_state_table.GraphicsLevel[0].pcieDpmLevel = lowest_pcie_level_enabled; - - /* set pcieDpmLevel to mid_pcie_level_enabled*/ - smu_data->smc_state_table.GraphicsLevel[1].pcieDpmLevel = mid_pcie_level_enabled; - - /* level count will send to smc once at init smc table and never change*/ - result = smu7_copy_bytes_to_smc(hwmgr, level_array_adress, - (uint8_t *)levels, (uint32_t)level_array_size, - SMC_RAM_END); - - return result; -} - -/** - * Populates the SMC MCLK structure using the provided memory clock - * - * @param hwmgr the address of the hardware manager - * @param memory_clock the memory clock to use to populate the structure - * @param sclk the SMC SCLK structure to be populated - */ -static int iceland_calculate_mclk_params( - struct pp_hwmgr *hwmgr, - uint32_t memory_clock, - SMU71_Discrete_MemoryLevel *mclk, - bool strobe_mode, - bool dllStateOn - ) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - uint32_t dll_cntl = data->clock_registers.vDLL_CNTL; - uint32_t mclk_pwrmgt_cntl = data->clock_registers.vMCLK_PWRMGT_CNTL; - uint32_t mpll_ad_func_cntl = data->clock_registers.vMPLL_AD_FUNC_CNTL; - uint32_t mpll_dq_func_cntl = data->clock_registers.vMPLL_DQ_FUNC_CNTL; - uint32_t mpll_func_cntl = data->clock_registers.vMPLL_FUNC_CNTL; - uint32_t mpll_func_cntl_1 = data->clock_registers.vMPLL_FUNC_CNTL_1; - uint32_t mpll_func_cntl_2 = data->clock_registers.vMPLL_FUNC_CNTL_2; - uint32_t mpll_ss1 = data->clock_registers.vMPLL_SS1; - uint32_t mpll_ss2 = data->clock_registers.vMPLL_SS2; - - pp_atomctrl_memory_clock_param mpll_param; - int result; - - result = atomctrl_get_memory_pll_dividers_si(hwmgr, - memory_clock, &mpll_param, strobe_mode); - PP_ASSERT_WITH_CODE(0 == result, - "Error retrieving Memory Clock Parameters from VBIOS.", return result); - - /* MPLL_FUNC_CNTL setup*/ - mpll_func_cntl = PHM_SET_FIELD(mpll_func_cntl, MPLL_FUNC_CNTL, BWCTRL, mpll_param.bw_ctrl); - - /* MPLL_FUNC_CNTL_1 setup*/ - mpll_func_cntl_1 = PHM_SET_FIELD(mpll_func_cntl_1, - MPLL_FUNC_CNTL_1, CLKF, mpll_param.mpll_fb_divider.cl_kf); - mpll_func_cntl_1 = PHM_SET_FIELD(mpll_func_cntl_1, - MPLL_FUNC_CNTL_1, CLKFRAC, mpll_param.mpll_fb_divider.clk_frac); - mpll_func_cntl_1 = PHM_SET_FIELD(mpll_func_cntl_1, - MPLL_FUNC_CNTL_1, VCO_MODE, mpll_param.vco_mode); - - /* MPLL_AD_FUNC_CNTL setup*/ - mpll_ad_func_cntl = PHM_SET_FIELD(mpll_ad_func_cntl, - MPLL_AD_FUNC_CNTL, YCLK_POST_DIV, mpll_param.mpll_post_divider); - - if (data->is_memory_gddr5) { - /* MPLL_DQ_FUNC_CNTL setup*/ - mpll_dq_func_cntl = PHM_SET_FIELD(mpll_dq_func_cntl, - MPLL_DQ_FUNC_CNTL, YCLK_SEL, mpll_param.yclk_sel); - mpll_dq_func_cntl = PHM_SET_FIELD(mpll_dq_func_cntl, - MPLL_DQ_FUNC_CNTL, YCLK_POST_DIV, mpll_param.mpll_post_divider); - } - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_MemorySpreadSpectrumSupport)) { - /* - ************************************ - Fref = Reference Frequency - NF = Feedback divider ratio - NR = Reference divider ratio - Fnom = Nominal VCO output frequency = Fref * NF / NR - Fs = Spreading Rate - D = Percentage down-spread / 2 - Fint = Reference input frequency to PFD = Fref / NR - NS = Spreading rate divider ratio = int(Fint / (2 * Fs)) - CLKS = NS - 1 = ISS_STEP_NUM[11:0] - NV = D * Fs / Fnom * 4 * ((Fnom/Fref * NR) ^ 2) - CLKV = 65536 * NV = ISS_STEP_SIZE[25:0] - ************************************* - */ - pp_atomctrl_internal_ss_info ss_info; - uint32_t freq_nom; - uint32_t tmp; - uint32_t reference_clock = atomctrl_get_mpll_reference_clock(hwmgr); - - /* for GDDR5 for all modes and DDR3 */ - if (1 == mpll_param.qdr) - freq_nom = memory_clock * 4 * (1 << mpll_param.mpll_post_divider); - else - freq_nom = memory_clock * 2 * (1 << mpll_param.mpll_post_divider); - - /* tmp = (freq_nom / reference_clock * reference_divider) ^ 2 Note: S.I. reference_divider = 1*/ - tmp = (freq_nom / reference_clock); - tmp = tmp * tmp; - - if (0 == atomctrl_get_memory_clock_spread_spectrum(hwmgr, freq_nom, &ss_info)) { - /* ss_info.speed_spectrum_percentage -- in unit of 0.01% */ - /* ss.Info.speed_spectrum_rate -- in unit of khz */ - /* CLKS = reference_clock / (2 * speed_spectrum_rate * reference_divider) * 10 */ - /* = reference_clock * 5 / speed_spectrum_rate */ - uint32_t clks = reference_clock * 5 / ss_info.speed_spectrum_rate; - - /* CLKV = 65536 * speed_spectrum_percentage / 2 * spreadSpecrumRate / freq_nom * 4 / 100000 * ((freq_nom / reference_clock) ^ 2) */ - /* = 131 * speed_spectrum_percentage * speed_spectrum_rate / 100 * ((freq_nom / reference_clock) ^ 2) / freq_nom */ - uint32_t clkv = - (uint32_t)((((131 * ss_info.speed_spectrum_percentage * - ss_info.speed_spectrum_rate) / 100) * tmp) / freq_nom); - - mpll_ss1 = PHM_SET_FIELD(mpll_ss1, MPLL_SS1, CLKV, clkv); - mpll_ss2 = PHM_SET_FIELD(mpll_ss2, MPLL_SS2, CLKS, clks); - } - } - - /* MCLK_PWRMGT_CNTL setup */ - mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, - MCLK_PWRMGT_CNTL, DLL_SPEED, mpll_param.dll_speed); - mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, - MCLK_PWRMGT_CNTL, MRDCK0_PDNB, dllStateOn); - mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, - MCLK_PWRMGT_CNTL, MRDCK1_PDNB, dllStateOn); - - - /* Save the result data to outpupt memory level structure */ - mclk->MclkFrequency = memory_clock; - mclk->MpllFuncCntl = mpll_func_cntl; - mclk->MpllFuncCntl_1 = mpll_func_cntl_1; - mclk->MpllFuncCntl_2 = mpll_func_cntl_2; - mclk->MpllAdFuncCntl = mpll_ad_func_cntl; - mclk->MpllDqFuncCntl = mpll_dq_func_cntl; - mclk->MclkPwrmgtCntl = mclk_pwrmgt_cntl; - mclk->DllCntl = dll_cntl; - mclk->MpllSs1 = mpll_ss1; - mclk->MpllSs2 = mpll_ss2; - - return 0; -} - -static uint8_t iceland_get_mclk_frequency_ratio(uint32_t memory_clock, - bool strobe_mode) -{ - uint8_t mc_para_index; - - if (strobe_mode) { - if (memory_clock < 12500) { - mc_para_index = 0x00; - } else if (memory_clock > 47500) { - mc_para_index = 0x0f; - } else { - mc_para_index = (uint8_t)((memory_clock - 10000) / 2500); - } - } else { - if (memory_clock < 65000) { - mc_para_index = 0x00; - } else if (memory_clock > 135000) { - mc_para_index = 0x0f; - } else { - mc_para_index = (uint8_t)((memory_clock - 60000) / 5000); - } - } - - return mc_para_index; -} - -static uint8_t iceland_get_ddr3_mclk_frequency_ratio(uint32_t memory_clock) -{ - uint8_t mc_para_index; - - if (memory_clock < 10000) { - mc_para_index = 0; - } else if (memory_clock >= 80000) { - mc_para_index = 0x0f; - } else { - mc_para_index = (uint8_t)((memory_clock - 10000) / 5000 + 1); - } - - return mc_para_index; -} - -static int iceland_populate_phase_value_based_on_mclk(struct pp_hwmgr *hwmgr, const struct phm_phase_shedding_limits_table *pl, - uint32_t memory_clock, uint32_t *p_shed) -{ - unsigned int i; - - *p_shed = 1; - - for (i = 0; i < pl->count; i++) { - if (memory_clock < pl->entries[i].Mclk) { - *p_shed = i; - break; - } - } - - return 0; -} - -static int iceland_populate_single_memory_level( - struct pp_hwmgr *hwmgr, - uint32_t memory_clock, - SMU71_Discrete_MemoryLevel *memory_level - ) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - int result = 0; - bool dll_state_on; - struct cgs_display_info info = {0}; - uint32_t mclk_edc_wr_enable_threshold = 40000; - uint32_t mclk_edc_enable_threshold = 40000; - uint32_t mclk_strobe_mode_threshold = 40000; - - if (hwmgr->dyn_state.vddc_dependency_on_mclk != NULL) { - result = iceland_get_dependency_volt_by_clk(hwmgr, - hwmgr->dyn_state.vddc_dependency_on_mclk, memory_clock, &memory_level->MinVddc); - PP_ASSERT_WITH_CODE((0 == result), - "can not find MinVddc voltage value from memory VDDC voltage dependency table", return result); - } - - if (data->vddci_control == SMU7_VOLTAGE_CONTROL_NONE) { - memory_level->MinVddci = memory_level->MinVddc; - } else if (NULL != hwmgr->dyn_state.vddci_dependency_on_mclk) { - result = iceland_get_dependency_volt_by_clk(hwmgr, - hwmgr->dyn_state.vddci_dependency_on_mclk, - memory_clock, - &memory_level->MinVddci); - PP_ASSERT_WITH_CODE((0 == result), - "can not find MinVddci voltage value from memory VDDCI voltage dependency table", return result); - } - - memory_level->MinVddcPhases = 1; - - if (data->vddc_phase_shed_control) { - iceland_populate_phase_value_based_on_mclk(hwmgr, hwmgr->dyn_state.vddc_phase_shed_limits_table, - memory_clock, &memory_level->MinVddcPhases); - } - - memory_level->EnabledForThrottle = 1; - memory_level->EnabledForActivity = 0; - memory_level->UpHyst = 0; - memory_level->DownHyst = 100; - memory_level->VoltageDownHyst = 0; - - /* Indicates maximum activity level for this performance level.*/ - memory_level->ActivityLevel = (uint16_t)data->mclk_activity_target; - memory_level->StutterEnable = 0; - memory_level->StrobeEnable = 0; - memory_level->EdcReadEnable = 0; - memory_level->EdcWriteEnable = 0; - memory_level->RttEnable = 0; - - /* default set to low watermark. Highest level will be set to high later.*/ - memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - - cgs_get_active_displays_info(hwmgr->device, &info); - data->display_timing.num_existing_displays = info.display_count; - - /* stutter mode not support on iceland */ - - /* decide strobe mode*/ - memory_level->StrobeEnable = (mclk_strobe_mode_threshold != 0) && - (memory_clock <= mclk_strobe_mode_threshold); - - /* decide EDC mode and memory clock ratio*/ - if (data->is_memory_gddr5) { - memory_level->StrobeRatio = iceland_get_mclk_frequency_ratio(memory_clock, - memory_level->StrobeEnable); - - if ((mclk_edc_enable_threshold != 0) && - (memory_clock > mclk_edc_enable_threshold)) { - memory_level->EdcReadEnable = 1; - } - - if ((mclk_edc_wr_enable_threshold != 0) && - (memory_clock > mclk_edc_wr_enable_threshold)) { - memory_level->EdcWriteEnable = 1; - } - - if (memory_level->StrobeEnable) { - if (iceland_get_mclk_frequency_ratio(memory_clock, 1) >= - ((cgs_read_register(hwmgr->device, mmMC_SEQ_MISC7) >> 16) & 0xf)) - dll_state_on = ((cgs_read_register(hwmgr->device, mmMC_SEQ_MISC5) >> 1) & 0x1) ? 1 : 0; - else - dll_state_on = ((cgs_read_register(hwmgr->device, mmMC_SEQ_MISC6) >> 1) & 0x1) ? 1 : 0; - } else - dll_state_on = data->dll_default_on; - } else { - memory_level->StrobeRatio = - iceland_get_ddr3_mclk_frequency_ratio(memory_clock); - dll_state_on = ((cgs_read_register(hwmgr->device, mmMC_SEQ_MISC5) >> 1) & 0x1) ? 1 : 0; - } - - result = iceland_calculate_mclk_params(hwmgr, - memory_clock, memory_level, memory_level->StrobeEnable, dll_state_on); - - if (0 == result) { - memory_level->MinVddc = PP_HOST_TO_SMC_UL(memory_level->MinVddc * VOLTAGE_SCALE); - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MinVddcPhases); - memory_level->MinVddci = PP_HOST_TO_SMC_UL(memory_level->MinVddci * VOLTAGE_SCALE); - memory_level->MinMvdd = PP_HOST_TO_SMC_UL(memory_level->MinMvdd * VOLTAGE_SCALE); - /* MCLK frequency in units of 10KHz*/ - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MclkFrequency); - /* Indicates maximum activity level for this performance level.*/ - CONVERT_FROM_HOST_TO_SMC_US(memory_level->ActivityLevel); - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllFuncCntl); - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllFuncCntl_1); - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllFuncCntl_2); - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllAdFuncCntl); - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllDqFuncCntl); - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MclkPwrmgtCntl); - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->DllCntl); - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllSs1); - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllSs2); - } - - return result; -} - -/** - * Populates all SMC MCLK levels' structure based on the trimmed allowed dpm memory clock states - * - * @param hwmgr the address of the hardware manager - */ - -int iceland_populate_all_memory_levels(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - struct smu7_dpm_table *dpm_table = &data->dpm_table; - int result; - - /* populate MCLK dpm table to SMU7 */ - uint32_t level_array_adress = smu_data->smu7_data.dpm_table_start + offsetof(SMU71_Discrete_DpmTable, MemoryLevel); - uint32_t level_array_size = sizeof(SMU71_Discrete_MemoryLevel) * SMU71_MAX_LEVELS_MEMORY; - SMU71_Discrete_MemoryLevel *levels = smu_data->smc_state_table.MemoryLevel; - uint32_t i; - - memset(levels, 0x00, level_array_size); - - for (i = 0; i < dpm_table->mclk_table.count; i++) { - PP_ASSERT_WITH_CODE((0 != dpm_table->mclk_table.dpm_levels[i].value), - "can not populate memory level as memory clock is zero", return -EINVAL); - result = iceland_populate_single_memory_level(hwmgr, dpm_table->mclk_table.dpm_levels[i].value, - &(smu_data->smc_state_table.MemoryLevel[i])); - if (0 != result) { - return result; - } - } - - /* Only enable level 0 for now.*/ - smu_data->smc_state_table.MemoryLevel[0].EnabledForActivity = 1; - - /* - * in order to prevent MC activity from stutter mode to push DPM up. - * the UVD change complements this by putting the MCLK in a higher state - * by default such that we are not effected by up threshold or and MCLK DPM latency. - */ - smu_data->smc_state_table.MemoryLevel[0].ActivityLevel = 0x1F; - CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table.MemoryLevel[0].ActivityLevel); - - smu_data->smc_state_table.MemoryDpmLevelCount = (uint8_t)dpm_table->mclk_table.count; - data->dpm_level_enable_mask.mclk_dpm_enable_mask = phm_get_dpm_level_enable_mask_value(&dpm_table->mclk_table); - /* set highest level watermark to high*/ - smu_data->smc_state_table.MemoryLevel[dpm_table->mclk_table.count-1].DisplayWatermark = PPSMC_DISPLAY_WATERMARK_HIGH; - - /* level count will send to smc once at init smc table and never change*/ - result = smu7_copy_bytes_to_smc(hwmgr, - level_array_adress, (uint8_t *)levels, (uint32_t)level_array_size, - SMC_RAM_END); - - return result; -} - -static int iceland_populate_mvdd_value(struct pp_hwmgr *hwmgr, uint32_t mclk, - SMU71_Discrete_VoltageLevel *voltage) -{ - const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - uint32_t i = 0; - - if (SMU7_VOLTAGE_CONTROL_NONE != data->mvdd_control) { - /* find mvdd value which clock is more than request */ - for (i = 0; i < hwmgr->dyn_state.mvdd_dependency_on_mclk->count; i++) { - if (mclk <= hwmgr->dyn_state.mvdd_dependency_on_mclk->entries[i].clk) { - /* Always round to higher voltage. */ - voltage->Voltage = data->mvdd_voltage_table.entries[i].value; - break; - } - } - - PP_ASSERT_WITH_CODE(i < hwmgr->dyn_state.mvdd_dependency_on_mclk->count, - "MVDD Voltage is outside the supported range.", return -EINVAL); - - } else { - return -EINVAL; - } - - return 0; -} - -static int iceland_populate_smc_acpi_level(struct pp_hwmgr *hwmgr, - SMU71_Discrete_DpmTable *table) -{ - int result = 0; - const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct pp_atomctrl_clock_dividers_vi dividers; - uint32_t vddc_phase_shed_control = 0; - - SMU71_Discrete_VoltageLevel voltage_level; - uint32_t spll_func_cntl = data->clock_registers.vCG_SPLL_FUNC_CNTL; - uint32_t spll_func_cntl_2 = data->clock_registers.vCG_SPLL_FUNC_CNTL_2; - uint32_t dll_cntl = data->clock_registers.vDLL_CNTL; - uint32_t mclk_pwrmgt_cntl = data->clock_registers.vMCLK_PWRMGT_CNTL; - - - /* The ACPI state should not do DPM on DC (or ever).*/ - table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC; - - if (data->acpi_vddc) - table->ACPILevel.MinVddc = PP_HOST_TO_SMC_UL(data->acpi_vddc * VOLTAGE_SCALE); - else - table->ACPILevel.MinVddc = PP_HOST_TO_SMC_UL(data->min_vddc_in_pptable * VOLTAGE_SCALE); - - table->ACPILevel.MinVddcPhases = vddc_phase_shed_control ? 0 : 1; - /* assign zero for now*/ - table->ACPILevel.SclkFrequency = atomctrl_get_reference_clock(hwmgr); - - /* get the engine clock dividers for this clock value*/ - result = atomctrl_get_engine_pll_dividers_vi(hwmgr, - table->ACPILevel.SclkFrequency, ÷rs); - - PP_ASSERT_WITH_CODE(result == 0, - "Error retrieving Engine Clock dividers from VBIOS.", return result); - - /* divider ID for required SCLK*/ - table->ACPILevel.SclkDid = (uint8_t)dividers.pll_post_divider; - table->ACPILevel.DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - table->ACPILevel.DeepSleepDivId = 0; - - spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, - CG_SPLL_FUNC_CNTL, SPLL_PWRON, 0); - spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, - CG_SPLL_FUNC_CNTL, SPLL_RESET, 1); - spll_func_cntl_2 = PHM_SET_FIELD(spll_func_cntl_2, - CG_SPLL_FUNC_CNTL_2, SCLK_MUX_SEL, 4); - - table->ACPILevel.CgSpllFuncCntl = spll_func_cntl; - table->ACPILevel.CgSpllFuncCntl2 = spll_func_cntl_2; - table->ACPILevel.CgSpllFuncCntl3 = data->clock_registers.vCG_SPLL_FUNC_CNTL_3; - table->ACPILevel.CgSpllFuncCntl4 = data->clock_registers.vCG_SPLL_FUNC_CNTL_4; - table->ACPILevel.SpllSpreadSpectrum = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM; - table->ACPILevel.SpllSpreadSpectrum2 = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM_2; - table->ACPILevel.CcPwrDynRm = 0; - table->ACPILevel.CcPwrDynRm1 = 0; - - - /* For various features to be enabled/disabled while this level is active.*/ - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.Flags); - /* SCLK frequency in units of 10KHz*/ - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SclkFrequency); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl2); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl3); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl4); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SpllSpreadSpectrum); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SpllSpreadSpectrum2); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm1); - - /* table->MemoryACPILevel.MinVddcPhases = table->ACPILevel.MinVddcPhases;*/ - table->MemoryACPILevel.MinVddc = table->ACPILevel.MinVddc; - table->MemoryACPILevel.MinVddcPhases = table->ACPILevel.MinVddcPhases; - - if (SMU7_VOLTAGE_CONTROL_NONE == data->vddci_control) - table->MemoryACPILevel.MinVddci = table->MemoryACPILevel.MinVddc; - else { - if (data->acpi_vddci != 0) - table->MemoryACPILevel.MinVddci = PP_HOST_TO_SMC_UL(data->acpi_vddci * VOLTAGE_SCALE); - else - table->MemoryACPILevel.MinVddci = PP_HOST_TO_SMC_UL(data->min_vddci_in_pptable * VOLTAGE_SCALE); - } - - if (0 == iceland_populate_mvdd_value(hwmgr, 0, &voltage_level)) - table->MemoryACPILevel.MinMvdd = - PP_HOST_TO_SMC_UL(voltage_level.Voltage * VOLTAGE_SCALE); - else - table->MemoryACPILevel.MinMvdd = 0; - - /* Force reset on DLL*/ - mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, - MCLK_PWRMGT_CNTL, MRDCK0_RESET, 0x1); - mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, - MCLK_PWRMGT_CNTL, MRDCK1_RESET, 0x1); - - /* Disable DLL in ACPIState*/ - mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, - MCLK_PWRMGT_CNTL, MRDCK0_PDNB, 0); - mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, - MCLK_PWRMGT_CNTL, MRDCK1_PDNB, 0); - - /* Enable DLL bypass signal*/ - dll_cntl = PHM_SET_FIELD(dll_cntl, - DLL_CNTL, MRDCK0_BYPASS, 0); - dll_cntl = PHM_SET_FIELD(dll_cntl, - DLL_CNTL, MRDCK1_BYPASS, 0); - - table->MemoryACPILevel.DllCntl = - PP_HOST_TO_SMC_UL(dll_cntl); - table->MemoryACPILevel.MclkPwrmgtCntl = - PP_HOST_TO_SMC_UL(mclk_pwrmgt_cntl); - table->MemoryACPILevel.MpllAdFuncCntl = - PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_AD_FUNC_CNTL); - table->MemoryACPILevel.MpllDqFuncCntl = - PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_DQ_FUNC_CNTL); - table->MemoryACPILevel.MpllFuncCntl = - PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_FUNC_CNTL); - table->MemoryACPILevel.MpllFuncCntl_1 = - PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_FUNC_CNTL_1); - table->MemoryACPILevel.MpllFuncCntl_2 = - PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_FUNC_CNTL_2); - table->MemoryACPILevel.MpllSs1 = - PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_SS1); - table->MemoryACPILevel.MpllSs2 = - PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_SS2); - - table->MemoryACPILevel.EnabledForThrottle = 0; - table->MemoryACPILevel.EnabledForActivity = 0; - table->MemoryACPILevel.UpHyst = 0; - table->MemoryACPILevel.DownHyst = 100; - table->MemoryACPILevel.VoltageDownHyst = 0; - /* Indicates maximum activity level for this performance level.*/ - table->MemoryACPILevel.ActivityLevel = PP_HOST_TO_SMC_US((uint16_t)data->mclk_activity_target); - - table->MemoryACPILevel.StutterEnable = 0; - table->MemoryACPILevel.StrobeEnable = 0; - table->MemoryACPILevel.EdcReadEnable = 0; - table->MemoryACPILevel.EdcWriteEnable = 0; - table->MemoryACPILevel.RttEnable = 0; - - return result; -} - -static int iceland_populate_smc_uvd_level(struct pp_hwmgr *hwmgr, - SMU71_Discrete_DpmTable *table) -{ - return 0; -} - -static int iceland_populate_smc_vce_level(struct pp_hwmgr *hwmgr, - SMU71_Discrete_DpmTable *table) -{ - return 0; -} - -static int iceland_populate_smc_acp_level(struct pp_hwmgr *hwmgr, - SMU71_Discrete_DpmTable *table) -{ - return 0; -} - -static int iceland_populate_smc_samu_level(struct pp_hwmgr *hwmgr, - SMU71_Discrete_DpmTable *table) -{ - return 0; -} - -static int iceland_populate_memory_timing_parameters( - struct pp_hwmgr *hwmgr, - uint32_t engine_clock, - uint32_t memory_clock, - struct SMU71_Discrete_MCArbDramTimingTableEntry *arb_regs - ) -{ - uint32_t dramTiming; - uint32_t dramTiming2; - uint32_t burstTime; - int result; - - result = atomctrl_set_engine_dram_timings_rv770(hwmgr, - engine_clock, memory_clock); - - PP_ASSERT_WITH_CODE(result == 0, - "Error calling VBIOS to set DRAM_TIMING.", return result); - - dramTiming = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING); - dramTiming2 = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING2); - burstTime = PHM_READ_FIELD(hwmgr->device, MC_ARB_BURST_TIME, STATE0); - - arb_regs->McArbDramTiming = PP_HOST_TO_SMC_UL(dramTiming); - arb_regs->McArbDramTiming2 = PP_HOST_TO_SMC_UL(dramTiming2); - arb_regs->McArbBurstTime = (uint8_t)burstTime; - - return 0; -} - -/** - * Setup parameters for the MC ARB. - * - * @param hwmgr the address of the powerplay hardware manager. - * @return always 0 - * This function is to be called from the SetPowerState table. - */ -static int iceland_program_memory_timing_parameters(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - int result = 0; - SMU71_Discrete_MCArbDramTimingTable arb_regs; - uint32_t i, j; - - memset(&arb_regs, 0x00, sizeof(SMU71_Discrete_MCArbDramTimingTable)); - - for (i = 0; i < data->dpm_table.sclk_table.count; i++) { - for (j = 0; j < data->dpm_table.mclk_table.count; j++) { - result = iceland_populate_memory_timing_parameters - (hwmgr, data->dpm_table.sclk_table.dpm_levels[i].value, - data->dpm_table.mclk_table.dpm_levels[j].value, - &arb_regs.entries[i][j]); - - if (0 != result) { - break; - } - } - } - - if (0 == result) { - result = smu7_copy_bytes_to_smc( - hwmgr, - smu_data->smu7_data.arb_table_start, - (uint8_t *)&arb_regs, - sizeof(SMU71_Discrete_MCArbDramTimingTable), - SMC_RAM_END - ); - } - - return result; -} - -static int iceland_populate_smc_boot_level(struct pp_hwmgr *hwmgr, - SMU71_Discrete_DpmTable *table) -{ - int result = 0; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - table->GraphicsBootLevel = 0; - table->MemoryBootLevel = 0; - - /* find boot level from dpm table*/ - result = phm_find_boot_level(&(data->dpm_table.sclk_table), - data->vbios_boot_state.sclk_bootup_value, - (uint32_t *)&(smu_data->smc_state_table.GraphicsBootLevel)); - - if (0 != result) { - smu_data->smc_state_table.GraphicsBootLevel = 0; - pr_err("VBIOS did not find boot engine clock value \ - in dependency table. Using Graphics DPM level 0!"); - result = 0; - } - - result = phm_find_boot_level(&(data->dpm_table.mclk_table), - data->vbios_boot_state.mclk_bootup_value, - (uint32_t *)&(smu_data->smc_state_table.MemoryBootLevel)); - - if (0 != result) { - smu_data->smc_state_table.MemoryBootLevel = 0; - pr_err("VBIOS did not find boot engine clock value \ - in dependency table. Using Memory DPM level 0!"); - result = 0; - } - - table->BootVddc = data->vbios_boot_state.vddc_bootup_value; - if (SMU7_VOLTAGE_CONTROL_NONE == data->vddci_control) - table->BootVddci = table->BootVddc; - else - table->BootVddci = data->vbios_boot_state.vddci_bootup_value; - - table->BootMVdd = data->vbios_boot_state.mvdd_bootup_value; - - return result; -} - -static int iceland_populate_mc_reg_address(struct pp_hwmgr *hwmgr, - SMU71_Discrete_MCRegisters *mc_reg_table) -{ - const struct iceland_smumgr *smu_data = (struct iceland_smumgr *)hwmgr->smu_backend; - - uint32_t i, j; - - for (i = 0, j = 0; j < smu_data->mc_reg_table.last; j++) { - if (smu_data->mc_reg_table.validflag & 1<<j) { - PP_ASSERT_WITH_CODE(i < SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE, - "Index of mc_reg_table->address[] array out of boundary", return -EINVAL); - mc_reg_table->address[i].s0 = - PP_HOST_TO_SMC_US(smu_data->mc_reg_table.mc_reg_address[j].s0); - mc_reg_table->address[i].s1 = - PP_HOST_TO_SMC_US(smu_data->mc_reg_table.mc_reg_address[j].s1); - i++; - } - } - - mc_reg_table->last = (uint8_t)i; - - return 0; -} - -/*convert register values from driver to SMC format */ -static void iceland_convert_mc_registers( - const struct iceland_mc_reg_entry *entry, - SMU71_Discrete_MCRegisterSet *data, - uint32_t num_entries, uint32_t valid_flag) -{ - uint32_t i, j; - - for (i = 0, j = 0; j < num_entries; j++) { - if (valid_flag & 1<<j) { - data->value[i] = PP_HOST_TO_SMC_UL(entry->mc_data[j]); - i++; - } - } -} - -static int iceland_convert_mc_reg_table_entry_to_smc(struct pp_hwmgr *hwmgr, - const uint32_t memory_clock, - SMU71_Discrete_MCRegisterSet *mc_reg_table_data - ) -{ - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - uint32_t i = 0; - - for (i = 0; i < smu_data->mc_reg_table.num_entries; i++) { - if (memory_clock <= - smu_data->mc_reg_table.mc_reg_table_entry[i].mclk_max) { - break; - } - } - - if ((i == smu_data->mc_reg_table.num_entries) && (i > 0)) - --i; - - iceland_convert_mc_registers(&smu_data->mc_reg_table.mc_reg_table_entry[i], - mc_reg_table_data, smu_data->mc_reg_table.last, - smu_data->mc_reg_table.validflag); - - return 0; -} - -static int iceland_convert_mc_reg_table_to_smc(struct pp_hwmgr *hwmgr, - SMU71_Discrete_MCRegisters *mc_regs) -{ - int result = 0; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - int res; - uint32_t i; - - for (i = 0; i < data->dpm_table.mclk_table.count; i++) { - res = iceland_convert_mc_reg_table_entry_to_smc( - hwmgr, - data->dpm_table.mclk_table.dpm_levels[i].value, - &mc_regs->data[i] - ); - - if (0 != res) - result = res; - } - - return result; -} - -static int iceland_update_and_upload_mc_reg_table(struct pp_hwmgr *hwmgr) -{ - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - uint32_t address; - int32_t result; - - if (0 == (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) - return 0; - - - memset(&smu_data->mc_regs, 0, sizeof(SMU71_Discrete_MCRegisters)); - - result = iceland_convert_mc_reg_table_to_smc(hwmgr, &(smu_data->mc_regs)); - - if (result != 0) - return result; - - - address = smu_data->smu7_data.mc_reg_table_start + (uint32_t)offsetof(SMU71_Discrete_MCRegisters, data[0]); - - return smu7_copy_bytes_to_smc(hwmgr, address, - (uint8_t *)&smu_data->mc_regs.data[0], - sizeof(SMU71_Discrete_MCRegisterSet) * data->dpm_table.mclk_table.count, - SMC_RAM_END); -} - -static int iceland_populate_initial_mc_reg_table(struct pp_hwmgr *hwmgr) -{ - int result; - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - - memset(&smu_data->mc_regs, 0x00, sizeof(SMU71_Discrete_MCRegisters)); - result = iceland_populate_mc_reg_address(hwmgr, &(smu_data->mc_regs)); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize MCRegTable for the MC register addresses!", return result;); - - result = iceland_convert_mc_reg_table_to_smc(hwmgr, &smu_data->mc_regs); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize MCRegTable for driver state!", return result;); - - return smu7_copy_bytes_to_smc(hwmgr, smu_data->smu7_data.mc_reg_table_start, - (uint8_t *)&smu_data->mc_regs, sizeof(SMU71_Discrete_MCRegisters), SMC_RAM_END); -} - -static int iceland_populate_smc_initial_state(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - uint8_t count, level; - - count = (uint8_t)(hwmgr->dyn_state.vddc_dependency_on_sclk->count); - - for (level = 0; level < count; level++) { - if (hwmgr->dyn_state.vddc_dependency_on_sclk->entries[level].clk - >= data->vbios_boot_state.sclk_bootup_value) { - smu_data->smc_state_table.GraphicsBootLevel = level; - break; - } - } - - count = (uint8_t)(hwmgr->dyn_state.vddc_dependency_on_mclk->count); - - for (level = 0; level < count; level++) { - if (hwmgr->dyn_state.vddc_dependency_on_mclk->entries[level].clk - >= data->vbios_boot_state.mclk_bootup_value) { - smu_data->smc_state_table.MemoryBootLevel = level; - break; - } - } - - return 0; -} - -static int iceland_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - const struct iceland_pt_defaults *defaults = smu_data->power_tune_defaults; - SMU71_Discrete_DpmTable *dpm_table = &(smu_data->smc_state_table); - struct phm_cac_tdp_table *cac_dtp_table = hwmgr->dyn_state.cac_dtp_table; - struct phm_ppm_table *ppm = hwmgr->dyn_state.ppm_parameter_table; - const uint16_t *def1, *def2; - int i, j, k; - - - /* - * TDP number of fraction bits are changed from 8 to 7 for Iceland - * as requested by SMC team - */ - - dpm_table->DefaultTdp = PP_HOST_TO_SMC_US((uint16_t)(cac_dtp_table->usTDP * 256)); - dpm_table->TargetTdp = PP_HOST_TO_SMC_US((uint16_t)(cac_dtp_table->usConfigurableTDP * 256)); - - - dpm_table->DTETjOffset = 0; - - dpm_table->GpuTjMax = (uint8_t)(data->thermal_temp_setting.temperature_high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES); - dpm_table->GpuTjHyst = 8; - - dpm_table->DTEAmbientTempBase = defaults->dte_ambient_temp_base; - - /* The following are for new Iceland Multi-input fan/thermal control */ - if (NULL != ppm) { - dpm_table->PPM_PkgPwrLimit = (uint16_t)ppm->dgpu_tdp * 256 / 1000; - dpm_table->PPM_TemperatureLimit = (uint16_t)ppm->tj_max * 256; - } else { - dpm_table->PPM_PkgPwrLimit = 0; - dpm_table->PPM_TemperatureLimit = 0; - } - - CONVERT_FROM_HOST_TO_SMC_US(dpm_table->PPM_PkgPwrLimit); - CONVERT_FROM_HOST_TO_SMC_US(dpm_table->PPM_TemperatureLimit); - - dpm_table->BAPM_TEMP_GRADIENT = PP_HOST_TO_SMC_UL(defaults->bapm_temp_gradient); - def1 = defaults->bapmti_r; - def2 = defaults->bapmti_rc; - - for (i = 0; i < SMU71_DTE_ITERATIONS; i++) { - for (j = 0; j < SMU71_DTE_SOURCES; j++) { - for (k = 0; k < SMU71_DTE_SINKS; k++) { - dpm_table->BAPMTI_R[i][j][k] = PP_HOST_TO_SMC_US(*def1); - dpm_table->BAPMTI_RC[i][j][k] = PP_HOST_TO_SMC_US(*def2); - def1++; - def2++; - } - } - } - - return 0; -} - -static int iceland_populate_smc_svi2_config(struct pp_hwmgr *hwmgr, - SMU71_Discrete_DpmTable *tab) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) - tab->SVI2Enable |= VDDC_ON_SVI2; - - if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) - tab->SVI2Enable |= VDDCI_ON_SVI2; - else - tab->MergedVddci = 1; - - if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->mvdd_control) - tab->SVI2Enable |= MVDD_ON_SVI2; - - PP_ASSERT_WITH_CODE(tab->SVI2Enable != (VDDC_ON_SVI2 | VDDCI_ON_SVI2 | MVDD_ON_SVI2) && - (tab->SVI2Enable & VDDC_ON_SVI2), "SVI2 domain configuration is incorrect!", return -EINVAL); - - return 0; -} - -/** - * Initializes the SMC table and uploads it - * - * @param hwmgr the address of the powerplay hardware manager. - * @param pInput the pointer to input data (PowerState) - * @return always 0 - */ -int iceland_init_smc_table(struct pp_hwmgr *hwmgr) -{ - int result; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - SMU71_Discrete_DpmTable *table = &(smu_data->smc_state_table); - - - iceland_initialize_power_tune_defaults(hwmgr); - memset(&(smu_data->smc_state_table), 0x00, sizeof(smu_data->smc_state_table)); - - if (SMU7_VOLTAGE_CONTROL_NONE != data->voltage_control) { - iceland_populate_smc_voltage_tables(hwmgr, table); - } - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_AutomaticDCTransition)) - table->SystemFlags |= PPSMC_SYSTEMFLAG_GPIO_DC; - - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_StepVddc)) - table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC; - - if (data->is_memory_gddr5) - table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5; - - - if (data->ulv_supported) { - result = iceland_populate_ulv_state(hwmgr, &(smu_data->ulv_setting)); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize ULV state!", return result;); - - cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, - ixCG_ULV_PARAMETER, 0x40035); - } - - result = iceland_populate_smc_link_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize Link Level!", return result;); - - result = iceland_populate_all_graphic_levels(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize Graphics Level!", return result;); - - result = iceland_populate_all_memory_levels(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize Memory Level!", return result;); - - result = iceland_populate_smc_acpi_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize ACPI Level!", return result;); - - result = iceland_populate_smc_vce_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize VCE Level!", return result;); - - result = iceland_populate_smc_acp_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize ACP Level!", return result;); - - result = iceland_populate_smc_samu_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize SAMU Level!", return result;); - - /* Since only the initial state is completely set up at this point (the other states are just copies of the boot state) we only */ - /* need to populate the ARB settings for the initial state. */ - result = iceland_program_memory_timing_parameters(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to Write ARB settings for the initial state.", return result;); - - result = iceland_populate_smc_uvd_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize UVD Level!", return result;); - - table->GraphicsBootLevel = 0; - table->MemoryBootLevel = 0; - - result = iceland_populate_smc_boot_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize Boot Level!", return result;); - - result = iceland_populate_smc_initial_state(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, "Failed to initialize Boot State!", return result); - - result = iceland_populate_bapm_parameters_in_dpm_table(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, "Failed to populate BAPM Parameters!", return result); - - table->GraphicsVoltageChangeEnable = 1; - table->GraphicsThermThrottleEnable = 1; - table->GraphicsInterval = 1; - table->VoltageInterval = 1; - table->ThermalInterval = 1; - - table->TemperatureLimitHigh = - (data->thermal_temp_setting.temperature_high * - SMU7_Q88_FORMAT_CONVERSION_UNIT) / PP_TEMPERATURE_UNITS_PER_CENTIGRADES; - table->TemperatureLimitLow = - (data->thermal_temp_setting.temperature_low * - SMU7_Q88_FORMAT_CONVERSION_UNIT) / PP_TEMPERATURE_UNITS_PER_CENTIGRADES; - - table->MemoryVoltageChangeEnable = 1; - table->MemoryInterval = 1; - table->VoltageResponseTime = 0; - table->PhaseResponseTime = 0; - table->MemoryThermThrottleEnable = 1; - table->PCIeBootLinkLevel = 0; - table->PCIeGenInterval = 1; - - result = iceland_populate_smc_svi2_config(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to populate SVI2 setting!", return result); - - table->ThermGpio = 17; - table->SclkStepSize = 0x4000; - - CONVERT_FROM_HOST_TO_SMC_UL(table->SystemFlags); - CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMaskVddcVid); - CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMaskVddcPhase); - CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMaskVddciVid); - CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMaskMvddVid); - CONVERT_FROM_HOST_TO_SMC_UL(table->SclkStepSize); - CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitHigh); - CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitLow); - CONVERT_FROM_HOST_TO_SMC_US(table->VoltageResponseTime); - CONVERT_FROM_HOST_TO_SMC_US(table->PhaseResponseTime); - - table->BootVddc = PP_HOST_TO_SMC_US(table->BootVddc * VOLTAGE_SCALE); - table->BootVddci = PP_HOST_TO_SMC_US(table->BootVddci * VOLTAGE_SCALE); - table->BootMVdd = PP_HOST_TO_SMC_US(table->BootMVdd * VOLTAGE_SCALE); - - /* Upload all dpm data to SMC memory.(dpm level, dpm level count etc) */ - result = smu7_copy_bytes_to_smc(hwmgr, smu_data->smu7_data.dpm_table_start + - offsetof(SMU71_Discrete_DpmTable, SystemFlags), - (uint8_t *)&(table->SystemFlags), - sizeof(SMU71_Discrete_DpmTable)-3 * sizeof(SMU71_PIDController), - SMC_RAM_END); - - PP_ASSERT_WITH_CODE(0 == result, - "Failed to upload dpm data to SMC memory!", return result;); - - /* Upload all ulv setting to SMC memory.(dpm level, dpm level count etc) */ - result = smu7_copy_bytes_to_smc(hwmgr, - smu_data->smu7_data.ulv_setting_starts, - (uint8_t *)&(smu_data->ulv_setting), - sizeof(SMU71_Discrete_Ulv), - SMC_RAM_END); - - - result = iceland_populate_initial_mc_reg_table(hwmgr); - PP_ASSERT_WITH_CODE((0 == result), - "Failed to populate initialize MC Reg table!", return result); - - result = iceland_populate_pm_fuses(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to populate PM fuses to SMC memory!", return result); - - return 0; -} - -/** -* Set up the fan table to control the fan using the SMC. -* @param hwmgr the address of the powerplay hardware manager. -* @param pInput the pointer to input data -* @param pOutput the pointer to output data -* @param pStorage the pointer to temporary storage -* @param Result the last failure code -* @return result from set temperature range routine -*/ -int iceland_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) -{ - struct smu7_smumgr *smu7_data = (struct smu7_smumgr *)(hwmgr->smu_backend); - SMU71_Discrete_FanTable fan_table = { FDO_MODE_HARDWARE }; - uint32_t duty100; - uint32_t t_diff1, t_diff2, pwm_diff1, pwm_diff2; - uint16_t fdo_min, slope1, slope2; - uint32_t reference_clock; - int res; - uint64_t tmp64; - - if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl)) - return 0; - - if (hwmgr->thermal_controller.fanInfo.bNoFan) { - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_MicrocodeFanControl); - return 0; - } - - if (0 == smu7_data->fan_table_start) { - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl); - return 0; - } - - duty100 = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_FDO_CTRL1, FMAX_DUTY100); - - if (0 == duty100) { - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl); - return 0; - } - - tmp64 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMMin * duty100; - do_div(tmp64, 10000); - fdo_min = (uint16_t)tmp64; - - t_diff1 = hwmgr->thermal_controller.advanceFanControlParameters.usTMed - hwmgr->thermal_controller.advanceFanControlParameters.usTMin; - t_diff2 = hwmgr->thermal_controller.advanceFanControlParameters.usTHigh - hwmgr->thermal_controller.advanceFanControlParameters.usTMed; - - pwm_diff1 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMMed - hwmgr->thermal_controller.advanceFanControlParameters.usPWMMin; - pwm_diff2 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMHigh - hwmgr->thermal_controller.advanceFanControlParameters.usPWMMed; - - slope1 = (uint16_t)((50 + ((16 * duty100 * pwm_diff1) / t_diff1)) / 100); - slope2 = (uint16_t)((50 + ((16 * duty100 * pwm_diff2) / t_diff2)) / 100); - - fan_table.TempMin = cpu_to_be16((50 + hwmgr->thermal_controller.advanceFanControlParameters.usTMin) / 100); - fan_table.TempMed = cpu_to_be16((50 + hwmgr->thermal_controller.advanceFanControlParameters.usTMed) / 100); - fan_table.TempMax = cpu_to_be16((50 + hwmgr->thermal_controller.advanceFanControlParameters.usTMax) / 100); - - fan_table.Slope1 = cpu_to_be16(slope1); - fan_table.Slope2 = cpu_to_be16(slope2); - - fan_table.FdoMin = cpu_to_be16(fdo_min); - - fan_table.HystDown = cpu_to_be16(hwmgr->thermal_controller.advanceFanControlParameters.ucTHyst); - - fan_table.HystUp = cpu_to_be16(1); - - fan_table.HystSlope = cpu_to_be16(1); - - fan_table.TempRespLim = cpu_to_be16(5); - - reference_clock = smu7_get_xclk(hwmgr); - - fan_table.RefreshPeriod = cpu_to_be32((hwmgr->thermal_controller.advanceFanControlParameters.ulCycleDelay * reference_clock) / 1600); - - fan_table.FdoMax = cpu_to_be16((uint16_t)duty100); - - fan_table.TempSrc = (uint8_t)PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_MULT_THERMAL_CTRL, TEMP_SEL); - - /* fan_table.FanControl_GL_Flag = 1; */ - - res = smu7_copy_bytes_to_smc(hwmgr, smu7_data->fan_table_start, (uint8_t *)&fan_table, (uint32_t)sizeof(fan_table), SMC_RAM_END); - - return 0; -} - - -static int iceland_program_mem_timing_parameters(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - if (data->need_update_smu7_dpm_table & - (DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_OD_UPDATE_MCLK)) - return iceland_program_memory_timing_parameters(hwmgr); - - return 0; -} - -int iceland_update_sclk_threshold(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - - int result = 0; - uint32_t low_sclk_interrupt_threshold = 0; - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_SclkThrottleLowNotification) - && (hwmgr->gfx_arbiter.sclk_threshold != - data->low_sclk_interrupt_threshold)) { - data->low_sclk_interrupt_threshold = - hwmgr->gfx_arbiter.sclk_threshold; - low_sclk_interrupt_threshold = - data->low_sclk_interrupt_threshold; - - CONVERT_FROM_HOST_TO_SMC_UL(low_sclk_interrupt_threshold); - - result = smu7_copy_bytes_to_smc( - hwmgr, - smu_data->smu7_data.dpm_table_start + - offsetof(SMU71_Discrete_DpmTable, - LowSclkInterruptThreshold), - (uint8_t *)&low_sclk_interrupt_threshold, - sizeof(uint32_t), - SMC_RAM_END); - } - - result = iceland_update_and_upload_mc_reg_table(hwmgr); - - PP_ASSERT_WITH_CODE((0 == result), "Failed to upload MC reg table!", return result); - - result = iceland_program_mem_timing_parameters(hwmgr); - PP_ASSERT_WITH_CODE((result == 0), - "Failed to program memory timing parameters!", - ); - - return result; -} - -uint32_t iceland_get_offsetof(uint32_t type, uint32_t member) -{ - switch (type) { - case SMU_SoftRegisters: - switch (member) { - case HandshakeDisables: - return offsetof(SMU71_SoftRegisters, HandshakeDisables); - case VoltageChangeTimeout: - return offsetof(SMU71_SoftRegisters, VoltageChangeTimeout); - case AverageGraphicsActivity: - return offsetof(SMU71_SoftRegisters, AverageGraphicsActivity); - case PreVBlankGap: - return offsetof(SMU71_SoftRegisters, PreVBlankGap); - case VBlankTimeout: - return offsetof(SMU71_SoftRegisters, VBlankTimeout); - case UcodeLoadStatus: - return offsetof(SMU71_SoftRegisters, UcodeLoadStatus); - } - case SMU_Discrete_DpmTable: - switch (member) { - case LowSclkInterruptThreshold: - return offsetof(SMU71_Discrete_DpmTable, LowSclkInterruptThreshold); - } - } - pr_warn("can't get the offset of type %x member %x\n", type, member); - return 0; -} - -uint32_t iceland_get_mac_definition(uint32_t value) -{ - switch (value) { - case SMU_MAX_LEVELS_GRAPHICS: - return SMU71_MAX_LEVELS_GRAPHICS; - case SMU_MAX_LEVELS_MEMORY: - return SMU71_MAX_LEVELS_MEMORY; - case SMU_MAX_LEVELS_LINK: - return SMU71_MAX_LEVELS_LINK; - case SMU_MAX_ENTRIES_SMIO: - return SMU71_MAX_ENTRIES_SMIO; - case SMU_MAX_LEVELS_VDDC: - return SMU71_MAX_LEVELS_VDDC; - case SMU_MAX_LEVELS_VDDCI: - return SMU71_MAX_LEVELS_VDDCI; - case SMU_MAX_LEVELS_MVDD: - return SMU71_MAX_LEVELS_MVDD; - } - - pr_warn("can't get the mac of %x\n", value); - return 0; -} - -/** - * Get the location of various tables inside the FW image. - * - * @param hwmgr the address of the powerplay hardware manager. - * @return always 0 - */ -int iceland_process_firmware_header(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct smu7_smumgr *smu7_data = (struct smu7_smumgr *)(hwmgr->smu_backend); - - uint32_t tmp; - int result; - bool error = false; - - result = smu7_read_smc_sram_dword(hwmgr, - SMU71_FIRMWARE_HEADER_LOCATION + - offsetof(SMU71_Firmware_Header, DpmTable), - &tmp, SMC_RAM_END); - - if (0 == result) { - smu7_data->dpm_table_start = tmp; - } - - error |= (0 != result); - - result = smu7_read_smc_sram_dword(hwmgr, - SMU71_FIRMWARE_HEADER_LOCATION + - offsetof(SMU71_Firmware_Header, SoftRegisters), - &tmp, SMC_RAM_END); - - if (0 == result) { - data->soft_regs_start = tmp; - smu7_data->soft_regs_start = tmp; - } - - error |= (0 != result); - - - result = smu7_read_smc_sram_dword(hwmgr, - SMU71_FIRMWARE_HEADER_LOCATION + - offsetof(SMU71_Firmware_Header, mcRegisterTable), - &tmp, SMC_RAM_END); - - if (0 == result) { - smu7_data->mc_reg_table_start = tmp; - } - - result = smu7_read_smc_sram_dword(hwmgr, - SMU71_FIRMWARE_HEADER_LOCATION + - offsetof(SMU71_Firmware_Header, FanTable), - &tmp, SMC_RAM_END); - - if (0 == result) { - smu7_data->fan_table_start = tmp; - } - - error |= (0 != result); - - result = smu7_read_smc_sram_dword(hwmgr, - SMU71_FIRMWARE_HEADER_LOCATION + - offsetof(SMU71_Firmware_Header, mcArbDramTimingTable), - &tmp, SMC_RAM_END); - - if (0 == result) { - smu7_data->arb_table_start = tmp; - } - - error |= (0 != result); - - - result = smu7_read_smc_sram_dword(hwmgr, - SMU71_FIRMWARE_HEADER_LOCATION + - offsetof(SMU71_Firmware_Header, Version), - &tmp, SMC_RAM_END); - - if (0 == result) { - hwmgr->microcode_version_info.SMC = tmp; - } - - error |= (0 != result); - - result = smu7_read_smc_sram_dword(hwmgr, - SMU71_FIRMWARE_HEADER_LOCATION + - offsetof(SMU71_Firmware_Header, UlvSettings), - &tmp, SMC_RAM_END); - - if (0 == result) { - smu7_data->ulv_setting_starts = tmp; - } - - error |= (0 != result); - - return error ? 1 : 0; -} - -/*---------------------------MC----------------------------*/ - -static uint8_t iceland_get_memory_modile_index(struct pp_hwmgr *hwmgr) -{ - return (uint8_t) (0xFF & (cgs_read_register(hwmgr->device, mmBIOS_SCRATCH_4) >> 16)); -} - -static bool iceland_check_s0_mc_reg_index(uint16_t in_reg, uint16_t *out_reg) -{ - bool result = true; - - switch (in_reg) { - case mmMC_SEQ_RAS_TIMING: - *out_reg = mmMC_SEQ_RAS_TIMING_LP; - break; - - case mmMC_SEQ_DLL_STBY: - *out_reg = mmMC_SEQ_DLL_STBY_LP; - break; - - case mmMC_SEQ_G5PDX_CMD0: - *out_reg = mmMC_SEQ_G5PDX_CMD0_LP; - break; - - case mmMC_SEQ_G5PDX_CMD1: - *out_reg = mmMC_SEQ_G5PDX_CMD1_LP; - break; - - case mmMC_SEQ_G5PDX_CTRL: - *out_reg = mmMC_SEQ_G5PDX_CTRL_LP; - break; - - case mmMC_SEQ_CAS_TIMING: - *out_reg = mmMC_SEQ_CAS_TIMING_LP; - break; - - case mmMC_SEQ_MISC_TIMING: - *out_reg = mmMC_SEQ_MISC_TIMING_LP; - break; - - case mmMC_SEQ_MISC_TIMING2: - *out_reg = mmMC_SEQ_MISC_TIMING2_LP; - break; - - case mmMC_SEQ_PMG_DVS_CMD: - *out_reg = mmMC_SEQ_PMG_DVS_CMD_LP; - break; - - case mmMC_SEQ_PMG_DVS_CTL: - *out_reg = mmMC_SEQ_PMG_DVS_CTL_LP; - break; - - case mmMC_SEQ_RD_CTL_D0: - *out_reg = mmMC_SEQ_RD_CTL_D0_LP; - break; - - case mmMC_SEQ_RD_CTL_D1: - *out_reg = mmMC_SEQ_RD_CTL_D1_LP; - break; - - case mmMC_SEQ_WR_CTL_D0: - *out_reg = mmMC_SEQ_WR_CTL_D0_LP; - break; - - case mmMC_SEQ_WR_CTL_D1: - *out_reg = mmMC_SEQ_WR_CTL_D1_LP; - break; - - case mmMC_PMG_CMD_EMRS: - *out_reg = mmMC_SEQ_PMG_CMD_EMRS_LP; - break; - - case mmMC_PMG_CMD_MRS: - *out_reg = mmMC_SEQ_PMG_CMD_MRS_LP; - break; - - case mmMC_PMG_CMD_MRS1: - *out_reg = mmMC_SEQ_PMG_CMD_MRS1_LP; - break; - - case mmMC_SEQ_PMG_TIMING: - *out_reg = mmMC_SEQ_PMG_TIMING_LP; - break; - - case mmMC_PMG_CMD_MRS2: - *out_reg = mmMC_SEQ_PMG_CMD_MRS2_LP; - break; - - case mmMC_SEQ_WR_CTL_2: - *out_reg = mmMC_SEQ_WR_CTL_2_LP; - break; - - default: - result = false; - break; - } - - return result; -} - -static int iceland_set_s0_mc_reg_index(struct iceland_mc_reg_table *table) -{ - uint32_t i; - uint16_t address; - - for (i = 0; i < table->last; i++) { - table->mc_reg_address[i].s0 = - iceland_check_s0_mc_reg_index(table->mc_reg_address[i].s1, &address) - ? address : table->mc_reg_address[i].s1; - } - return 0; -} - -static int iceland_copy_vbios_smc_reg_table(const pp_atomctrl_mc_reg_table *table, - struct iceland_mc_reg_table *ni_table) -{ - uint8_t i, j; - - PP_ASSERT_WITH_CODE((table->last <= SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE), - "Invalid VramInfo table.", return -EINVAL); - PP_ASSERT_WITH_CODE((table->num_entries <= MAX_AC_TIMING_ENTRIES), - "Invalid VramInfo table.", return -EINVAL); - - for (i = 0; i < table->last; i++) { - ni_table->mc_reg_address[i].s1 = table->mc_reg_address[i].s1; - } - ni_table->last = table->last; - - for (i = 0; i < table->num_entries; i++) { - ni_table->mc_reg_table_entry[i].mclk_max = - table->mc_reg_table_entry[i].mclk_max; - for (j = 0; j < table->last; j++) { - ni_table->mc_reg_table_entry[i].mc_data[j] = - table->mc_reg_table_entry[i].mc_data[j]; - } - } - - ni_table->num_entries = table->num_entries; - - return 0; -} - -/** - * VBIOS omits some information to reduce size, we need to recover them here. - * 1. when we see mmMC_SEQ_MISC1, bit[31:16] EMRS1, need to be write to mmMC_PMG_CMD_EMRS /_LP[15:0]. - * Bit[15:0] MRS, need to be update mmMC_PMG_CMD_MRS/_LP[15:0] - * 2. when we see mmMC_SEQ_RESERVE_M, bit[15:0] EMRS2, need to be write to mmMC_PMG_CMD_MRS1/_LP[15:0]. - * 3. need to set these data for each clock range - * - * @param hwmgr the address of the powerplay hardware manager. - * @param table the address of MCRegTable - * @return always 0 - */ -static int iceland_set_mc_special_registers(struct pp_hwmgr *hwmgr, - struct iceland_mc_reg_table *table) -{ - uint8_t i, j, k; - uint32_t temp_reg; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - for (i = 0, j = table->last; i < table->last; i++) { - PP_ASSERT_WITH_CODE((j < SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE), - "Invalid VramInfo table.", return -EINVAL); - - switch (table->mc_reg_address[i].s1) { - - case mmMC_SEQ_MISC1: - temp_reg = cgs_read_register(hwmgr->device, mmMC_PMG_CMD_EMRS); - table->mc_reg_address[j].s1 = mmMC_PMG_CMD_EMRS; - table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_EMRS_LP; - for (k = 0; k < table->num_entries; k++) { - table->mc_reg_table_entry[k].mc_data[j] = - ((temp_reg & 0xffff0000)) | - ((table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16); - } - j++; - PP_ASSERT_WITH_CODE((j < SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE), - "Invalid VramInfo table.", return -EINVAL); - - temp_reg = cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS); - table->mc_reg_address[j].s1 = mmMC_PMG_CMD_MRS; - table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_MRS_LP; - for (k = 0; k < table->num_entries; k++) { - table->mc_reg_table_entry[k].mc_data[j] = - (temp_reg & 0xffff0000) | - (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); - - if (!data->is_memory_gddr5) { - table->mc_reg_table_entry[k].mc_data[j] |= 0x100; - } - } - j++; - PP_ASSERT_WITH_CODE((j <= SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE), - "Invalid VramInfo table.", return -EINVAL); - - if (!data->is_memory_gddr5 && j < SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE) { - table->mc_reg_address[j].s1 = mmMC_PMG_AUTO_CMD; - table->mc_reg_address[j].s0 = mmMC_PMG_AUTO_CMD; - for (k = 0; k < table->num_entries; k++) { - table->mc_reg_table_entry[k].mc_data[j] = - (table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16; - } - j++; - PP_ASSERT_WITH_CODE((j <= SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE), - "Invalid VramInfo table.", return -EINVAL); - } - - break; - - case mmMC_SEQ_RESERVE_M: - temp_reg = cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS1); - table->mc_reg_address[j].s1 = mmMC_PMG_CMD_MRS1; - table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_MRS1_LP; - for (k = 0; k < table->num_entries; k++) { - table->mc_reg_table_entry[k].mc_data[j] = - (temp_reg & 0xffff0000) | - (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); - } - j++; - PP_ASSERT_WITH_CODE((j <= SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE), - "Invalid VramInfo table.", return -EINVAL); - break; - - default: - break; - } - - } - - table->last = j; - - return 0; -} - -static int iceland_set_valid_flag(struct iceland_mc_reg_table *table) -{ - uint8_t i, j; - for (i = 0; i < table->last; i++) { - for (j = 1; j < table->num_entries; j++) { - if (table->mc_reg_table_entry[j-1].mc_data[i] != - table->mc_reg_table_entry[j].mc_data[i]) { - table->validflag |= (1<<i); - break; - } - } - } - - return 0; -} - -int iceland_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) -{ - int result; - struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); - pp_atomctrl_mc_reg_table *table; - struct iceland_mc_reg_table *ni_table = &smu_data->mc_reg_table; - uint8_t module_index = iceland_get_memory_modile_index(hwmgr); - - table = kzalloc(sizeof(pp_atomctrl_mc_reg_table), GFP_KERNEL); - - if (NULL == table) - return -ENOMEM; - - /* Program additional LP registers that are no longer programmed by VBIOS */ - cgs_write_register(hwmgr->device, mmMC_SEQ_RAS_TIMING_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_RAS_TIMING)); - cgs_write_register(hwmgr->device, mmMC_SEQ_CAS_TIMING_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_CAS_TIMING)); - cgs_write_register(hwmgr->device, mmMC_SEQ_DLL_STBY_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_DLL_STBY)); - cgs_write_register(hwmgr->device, mmMC_SEQ_G5PDX_CMD0_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_G5PDX_CMD0)); - cgs_write_register(hwmgr->device, mmMC_SEQ_G5PDX_CMD1_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_G5PDX_CMD1)); - cgs_write_register(hwmgr->device, mmMC_SEQ_G5PDX_CTRL_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_G5PDX_CTRL)); - cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_DVS_CMD_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_PMG_DVS_CMD)); - cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_DVS_CTL_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_PMG_DVS_CTL)); - cgs_write_register(hwmgr->device, mmMC_SEQ_MISC_TIMING_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_MISC_TIMING)); - cgs_write_register(hwmgr->device, mmMC_SEQ_MISC_TIMING2_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_MISC_TIMING2)); - cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_EMRS_LP, cgs_read_register(hwmgr->device, mmMC_PMG_CMD_EMRS)); - cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_MRS_LP, cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS)); - cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_MRS1_LP, cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS1)); - cgs_write_register(hwmgr->device, mmMC_SEQ_WR_CTL_D0_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_WR_CTL_D0)); - cgs_write_register(hwmgr->device, mmMC_SEQ_WR_CTL_D1_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_WR_CTL_D1)); - cgs_write_register(hwmgr->device, mmMC_SEQ_RD_CTL_D0_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_RD_CTL_D0)); - cgs_write_register(hwmgr->device, mmMC_SEQ_RD_CTL_D1_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_RD_CTL_D1)); - cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_TIMING_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_PMG_TIMING)); - cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_MRS2_LP, cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS2)); - cgs_write_register(hwmgr->device, mmMC_SEQ_WR_CTL_2_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_WR_CTL_2)); - - memset(table, 0x00, sizeof(pp_atomctrl_mc_reg_table)); - - result = atomctrl_initialize_mc_reg_table(hwmgr, module_index, table); - - if (0 == result) - result = iceland_copy_vbios_smc_reg_table(table, ni_table); - - if (0 == result) { - iceland_set_s0_mc_reg_index(ni_table); - result = iceland_set_mc_special_registers(hwmgr, ni_table); - } - - if (0 == result) - iceland_set_valid_flag(ni_table); - - kfree(table); - - return result; -} - -bool iceland_is_dpm_running(struct pp_hwmgr *hwmgr) -{ - return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) - ? true : false; -} diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c index a778e174ba01..34128822b8fb 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c @@ -30,15 +30,84 @@ #include "smumgr.h" #include "iceland_smumgr.h" -#include "smu_ucode_xfer_vi.h" + #include "ppsmc.h" + +#include "cgs_common.h" + +#include "smu7_dyn_defaults.h" +#include "smu7_hwmgr.h" +#include "hardwaremanager.h" +#include "ppatomctrl.h" +#include "atombios.h" +#include "pppcielanes.h" +#include "pp_endian.h" +#include "processpptables.h" + + #include "smu/smu_7_1_1_d.h" #include "smu/smu_7_1_1_sh_mask.h" -#include "cgs_common.h" -#include "iceland_smc.h" +#include "smu71_discrete.h" + +#include "smu_ucode_xfer_vi.h" +#include "gmc/gmc_8_1_d.h" +#include "gmc/gmc_8_1_sh_mask.h" +#include "bif/bif_5_0_d.h" +#include "bif/bif_5_0_sh_mask.h" +#include "dce/dce_10_0_d.h" +#include "dce/dce_10_0_sh_mask.h" + #define ICELAND_SMC_SIZE 0x20000 +#define VOLTAGE_SCALE 4 +#define POWERTUNE_DEFAULT_SET_MAX 1 +#define VOLTAGE_VID_OFFSET_SCALE1 625 +#define VOLTAGE_VID_OFFSET_SCALE2 100 +#define MC_CG_ARB_FREQ_F1 0x0b +#define VDDC_VDDCI_DELTA 200 + +#define DEVICE_ID_VI_ICELAND_M_6900 0x6900 +#define DEVICE_ID_VI_ICELAND_M_6901 0x6901 +#define DEVICE_ID_VI_ICELAND_M_6902 0x6902 +#define DEVICE_ID_VI_ICELAND_M_6903 0x6903 + +static const struct iceland_pt_defaults defaults_iceland = { + /* + * sviLoadLIneEn, SviLoadLineVddC, TDC_VDDC_ThrottleReleaseLimitPerc, + * TDC_MAWt, TdcWaterfallCtl, DTEAmbientTempBase, DisplayCac, BAPM_TEMP_GRADIENT + */ + 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0xB0000, + { 0x79, 0x253, 0x25D, 0xAE, 0x72, 0x80, 0x83, 0x86, 0x6F, 0xC8, 0xC9, 0xC9, 0x2F, 0x4D, 0x61 }, + { 0x17C, 0x172, 0x180, 0x1BC, 0x1B3, 0x1BD, 0x206, 0x200, 0x203, 0x25D, 0x25A, 0x255, 0x2C3, 0x2C5, 0x2B4 } +}; + +/* 35W - XT, XTL */ +static const struct iceland_pt_defaults defaults_icelandxt = { + /* + * sviLoadLIneEn, SviLoadLineVddC, + * TDC_VDDC_ThrottleReleaseLimitPerc, TDC_MAWt, + * TdcWaterfallCtl, DTEAmbientTempBase, DisplayCac, + * BAPM_TEMP_GRADIENT + */ + 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0x0, + { 0xA7, 0x0, 0x0, 0xB5, 0x0, 0x0, 0x9F, 0x0, 0x0, 0xD6, 0x0, 0x0, 0xD7, 0x0, 0x0}, + { 0x1EA, 0x0, 0x0, 0x224, 0x0, 0x0, 0x25E, 0x0, 0x0, 0x28E, 0x0, 0x0, 0x2AB, 0x0, 0x0} +}; + +/* 25W - PRO, LE */ +static const struct iceland_pt_defaults defaults_icelandpro = { + /* + * sviLoadLIneEn, SviLoadLineVddC, + * TDC_VDDC_ThrottleReleaseLimitPerc, TDC_MAWt, + * TdcWaterfallCtl, DTEAmbientTempBase, DisplayCac, + * BAPM_TEMP_GRADIENT + */ + 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0x0, + { 0xB7, 0x0, 0x0, 0xC3, 0x0, 0x0, 0xB5, 0x0, 0x0, 0xEA, 0x0, 0x0, 0xE6, 0x0, 0x0}, + { 0x1EA, 0x0, 0x0, 0x224, 0x0, 0x0, 0x25E, 0x0, 0x0, 0x28E, 0x0, 0x0, 0x2AB, 0x0, 0x0} +}; + static int iceland_start_smc(struct pp_hwmgr *hwmgr) { PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, @@ -191,13 +260,6 @@ static int iceland_start_smu(struct pp_hwmgr *hwmgr) return result; } -/** - * Write a 32bit value to the SMC SRAM space. - * ALL PARAMETERS ARE IN HOST BYTE ORDER. - * @param smumgr the address of the powerplay hardware manager. - * @param smcAddress the address in the SMC RAM to access. - * @param value to write to the SMC SRAM. - */ static int iceland_smu_init(struct pp_hwmgr *hwmgr) { int i; @@ -219,6 +281,2413 @@ static int iceland_smu_init(struct pp_hwmgr *hwmgr) return 0; } + +static void iceland_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr) +{ + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + struct cgs_system_info sys_info = {0}; + uint32_t dev_id; + + sys_info.size = sizeof(struct cgs_system_info); + sys_info.info_id = CGS_SYSTEM_INFO_PCIE_DEV; + cgs_query_system_info(hwmgr->device, &sys_info); + dev_id = (uint32_t)sys_info.value; + + switch (dev_id) { + case DEVICE_ID_VI_ICELAND_M_6900: + case DEVICE_ID_VI_ICELAND_M_6903: + smu_data->power_tune_defaults = &defaults_icelandxt; + break; + + case DEVICE_ID_VI_ICELAND_M_6901: + case DEVICE_ID_VI_ICELAND_M_6902: + smu_data->power_tune_defaults = &defaults_icelandpro; + break; + default: + smu_data->power_tune_defaults = &defaults_iceland; + pr_warn("Unknown V.I. Device ID.\n"); + break; + } + return; +} + +static int iceland_populate_svi_load_line(struct pp_hwmgr *hwmgr) +{ + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + const struct iceland_pt_defaults *defaults = smu_data->power_tune_defaults; + + smu_data->power_tune_table.SviLoadLineEn = defaults->svi_load_line_en; + smu_data->power_tune_table.SviLoadLineVddC = defaults->svi_load_line_vddc; + smu_data->power_tune_table.SviLoadLineTrimVddC = 3; + smu_data->power_tune_table.SviLoadLineOffsetVddC = 0; + + return 0; +} + +static int iceland_populate_tdc_limit(struct pp_hwmgr *hwmgr) +{ + uint16_t tdc_limit; + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + const struct iceland_pt_defaults *defaults = smu_data->power_tune_defaults; + + tdc_limit = (uint16_t)(hwmgr->dyn_state.cac_dtp_table->usTDC * 256); + smu_data->power_tune_table.TDC_VDDC_PkgLimit = + CONVERT_FROM_HOST_TO_SMC_US(tdc_limit); + smu_data->power_tune_table.TDC_VDDC_ThrottleReleaseLimitPerc = + defaults->tdc_vddc_throttle_release_limit_perc; + smu_data->power_tune_table.TDC_MAWt = defaults->tdc_mawt; + + return 0; +} + +static int iceland_populate_dw8(struct pp_hwmgr *hwmgr, uint32_t fuse_table_offset) +{ + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + const struct iceland_pt_defaults *defaults = smu_data->power_tune_defaults; + uint32_t temp; + + if (smu7_read_smc_sram_dword(hwmgr, + fuse_table_offset + + offsetof(SMU71_Discrete_PmFuses, TdcWaterfallCtl), + (uint32_t *)&temp, SMC_RAM_END)) + PP_ASSERT_WITH_CODE(false, + "Attempt to read PmFuses.DW6 (SviLoadLineEn) from SMC Failed!", + return -EINVAL); + else + smu_data->power_tune_table.TdcWaterfallCtl = defaults->tdc_waterfall_ctl; + + return 0; +} + +static int iceland_populate_temperature_scaler(struct pp_hwmgr *hwmgr) +{ + return 0; +} + +static int iceland_populate_gnb_lpml(struct pp_hwmgr *hwmgr) +{ + int i; + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + + /* Currently not used. Set all to zero. */ + for (i = 0; i < 8; i++) + smu_data->power_tune_table.GnbLPML[i] = 0; + + return 0; +} + +static int iceland_populate_bapm_vddc_base_leakage_sidd(struct pp_hwmgr *hwmgr) +{ + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + uint16_t HiSidd = smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd; + uint16_t LoSidd = smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd; + struct phm_cac_tdp_table *cac_table = hwmgr->dyn_state.cac_dtp_table; + + HiSidd = (uint16_t)(cac_table->usHighCACLeakage / 100 * 256); + LoSidd = (uint16_t)(cac_table->usLowCACLeakage / 100 * 256); + + smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd = + CONVERT_FROM_HOST_TO_SMC_US(HiSidd); + smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd = + CONVERT_FROM_HOST_TO_SMC_US(LoSidd); + + return 0; +} + +static int iceland_populate_bapm_vddc_vid_sidd(struct pp_hwmgr *hwmgr) +{ + int i; + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + uint8_t *hi_vid = smu_data->power_tune_table.BapmVddCVidHiSidd; + uint8_t *lo_vid = smu_data->power_tune_table.BapmVddCVidLoSidd; + + PP_ASSERT_WITH_CODE(NULL != hwmgr->dyn_state.cac_leakage_table, + "The CAC Leakage table does not exist!", return -EINVAL); + PP_ASSERT_WITH_CODE(hwmgr->dyn_state.cac_leakage_table->count <= 8, + "There should never be more than 8 entries for BapmVddcVid!!!", return -EINVAL); + PP_ASSERT_WITH_CODE(hwmgr->dyn_state.cac_leakage_table->count == hwmgr->dyn_state.vddc_dependency_on_sclk->count, + "CACLeakageTable->count and VddcDependencyOnSCLk->count not equal", return -EINVAL); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_EVV)) { + for (i = 0; (uint32_t) i < hwmgr->dyn_state.cac_leakage_table->count; i++) { + lo_vid[i] = convert_to_vid(hwmgr->dyn_state.cac_leakage_table->entries[i].Vddc1); + hi_vid[i] = convert_to_vid(hwmgr->dyn_state.cac_leakage_table->entries[i].Vddc2); + } + } else { + PP_ASSERT_WITH_CODE(false, "Iceland should always support EVV", return -EINVAL); + } + + return 0; +} + +static int iceland_populate_vddc_vid(struct pp_hwmgr *hwmgr) +{ + int i; + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + uint8_t *vid = smu_data->power_tune_table.VddCVid; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + PP_ASSERT_WITH_CODE(data->vddc_voltage_table.count <= 8, + "There should never be more than 8 entries for VddcVid!!!", + return -EINVAL); + + for (i = 0; i < (int)data->vddc_voltage_table.count; i++) { + vid[i] = convert_to_vid(data->vddc_voltage_table.entries[i].value); + } + + return 0; +} + + + +static int iceland_populate_pm_fuses(struct pp_hwmgr *hwmgr) +{ + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + uint32_t pm_fuse_table_offset; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_PowerContainment)) { + if (smu7_read_smc_sram_dword(hwmgr, + SMU71_FIRMWARE_HEADER_LOCATION + + offsetof(SMU71_Firmware_Header, PmFuseTable), + &pm_fuse_table_offset, SMC_RAM_END)) + PP_ASSERT_WITH_CODE(false, + "Attempt to get pm_fuse_table_offset Failed!", + return -EINVAL); + + /* DW0 - DW3 */ + if (iceland_populate_bapm_vddc_vid_sidd(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate bapm vddc vid Failed!", + return -EINVAL); + + /* DW4 - DW5 */ + if (iceland_populate_vddc_vid(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate vddc vid Failed!", + return -EINVAL); + + /* DW6 */ + if (iceland_populate_svi_load_line(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate SviLoadLine Failed!", + return -EINVAL); + /* DW7 */ + if (iceland_populate_tdc_limit(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate TDCLimit Failed!", return -EINVAL); + /* DW8 */ + if (iceland_populate_dw8(hwmgr, pm_fuse_table_offset)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate TdcWaterfallCtl, " + "LPMLTemperature Min and Max Failed!", + return -EINVAL); + + /* DW9-DW12 */ + if (0 != iceland_populate_temperature_scaler(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate LPMLTemperatureScaler Failed!", + return -EINVAL); + + /* DW13-DW16 */ + if (iceland_populate_gnb_lpml(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate GnbLPML Failed!", + return -EINVAL); + + /* DW18 */ + if (iceland_populate_bapm_vddc_base_leakage_sidd(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate BapmVddCBaseLeakage Hi and Lo Sidd Failed!", + return -EINVAL); + + if (smu7_copy_bytes_to_smc(hwmgr, pm_fuse_table_offset, + (uint8_t *)&smu_data->power_tune_table, + sizeof(struct SMU71_Discrete_PmFuses), SMC_RAM_END)) + PP_ASSERT_WITH_CODE(false, + "Attempt to download PmFuseTable Failed!", + return -EINVAL); + } + return 0; +} + +static int iceland_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr, + struct phm_clock_voltage_dependency_table *allowed_clock_voltage_table, + uint32_t clock, uint32_t *vol) +{ + uint32_t i = 0; + + /* clock - voltage dependency table is empty table */ + if (allowed_clock_voltage_table->count == 0) + return -EINVAL; + + for (i = 0; i < allowed_clock_voltage_table->count; i++) { + /* find first sclk bigger than request */ + if (allowed_clock_voltage_table->entries[i].clk >= clock) { + *vol = allowed_clock_voltage_table->entries[i].v; + return 0; + } + } + + /* sclk is bigger than max sclk in the dependence table */ + *vol = allowed_clock_voltage_table->entries[i - 1].v; + + return 0; +} + +static int iceland_get_std_voltage_value_sidd(struct pp_hwmgr *hwmgr, + pp_atomctrl_voltage_table_entry *tab, uint16_t *hi, + uint16_t *lo) +{ + uint16_t v_index; + bool vol_found = false; + *hi = tab->value * VOLTAGE_SCALE; + *lo = tab->value * VOLTAGE_SCALE; + + /* SCLK/VDDC Dependency Table has to exist. */ + PP_ASSERT_WITH_CODE(NULL != hwmgr->dyn_state.vddc_dependency_on_sclk, + "The SCLK/VDDC Dependency Table does not exist.\n", + return -EINVAL); + + if (NULL == hwmgr->dyn_state.cac_leakage_table) { + pr_warn("CAC Leakage Table does not exist, using vddc.\n"); + return 0; + } + + /* + * Since voltage in the sclk/vddc dependency table is not + * necessarily in ascending order because of ELB voltage + * patching, loop through entire list to find exact voltage. + */ + for (v_index = 0; (uint32_t)v_index < hwmgr->dyn_state.vddc_dependency_on_sclk->count; v_index++) { + if (tab->value == hwmgr->dyn_state.vddc_dependency_on_sclk->entries[v_index].v) { + vol_found = true; + if ((uint32_t)v_index < hwmgr->dyn_state.cac_leakage_table->count) { + *lo = hwmgr->dyn_state.cac_leakage_table->entries[v_index].Vddc * VOLTAGE_SCALE; + *hi = (uint16_t)(hwmgr->dyn_state.cac_leakage_table->entries[v_index].Leakage * VOLTAGE_SCALE); + } else { + pr_warn("Index from SCLK/VDDC Dependency Table exceeds the CAC Leakage Table index, using maximum index from CAC table.\n"); + *lo = hwmgr->dyn_state.cac_leakage_table->entries[hwmgr->dyn_state.cac_leakage_table->count - 1].Vddc * VOLTAGE_SCALE; + *hi = (uint16_t)(hwmgr->dyn_state.cac_leakage_table->entries[hwmgr->dyn_state.cac_leakage_table->count - 1].Leakage * VOLTAGE_SCALE); + } + break; + } + } + + /* + * If voltage is not found in the first pass, loop again to + * find the best match, equal or higher value. + */ + if (!vol_found) { + for (v_index = 0; (uint32_t)v_index < hwmgr->dyn_state.vddc_dependency_on_sclk->count; v_index++) { + if (tab->value <= hwmgr->dyn_state.vddc_dependency_on_sclk->entries[v_index].v) { + vol_found = true; + if ((uint32_t)v_index < hwmgr->dyn_state.cac_leakage_table->count) { + *lo = hwmgr->dyn_state.cac_leakage_table->entries[v_index].Vddc * VOLTAGE_SCALE; + *hi = (uint16_t)(hwmgr->dyn_state.cac_leakage_table->entries[v_index].Leakage) * VOLTAGE_SCALE; + } else { + pr_warn("Index from SCLK/VDDC Dependency Table exceeds the CAC Leakage Table index in second look up, using maximum index from CAC table."); + *lo = hwmgr->dyn_state.cac_leakage_table->entries[hwmgr->dyn_state.cac_leakage_table->count - 1].Vddc * VOLTAGE_SCALE; + *hi = (uint16_t)(hwmgr->dyn_state.cac_leakage_table->entries[hwmgr->dyn_state.cac_leakage_table->count - 1].Leakage * VOLTAGE_SCALE); + } + break; + } + } + + if (!vol_found) + pr_warn("Unable to get std_vddc from SCLK/VDDC Dependency Table, using vddc.\n"); + } + + return 0; +} + +static int iceland_populate_smc_voltage_table(struct pp_hwmgr *hwmgr, + pp_atomctrl_voltage_table_entry *tab, + SMU71_Discrete_VoltageLevel *smc_voltage_tab) +{ + int result; + + result = iceland_get_std_voltage_value_sidd(hwmgr, tab, + &smc_voltage_tab->StdVoltageHiSidd, + &smc_voltage_tab->StdVoltageLoSidd); + if (0 != result) { + smc_voltage_tab->StdVoltageHiSidd = tab->value * VOLTAGE_SCALE; + smc_voltage_tab->StdVoltageLoSidd = tab->value * VOLTAGE_SCALE; + } + + smc_voltage_tab->Voltage = PP_HOST_TO_SMC_US(tab->value * VOLTAGE_SCALE); + CONVERT_FROM_HOST_TO_SMC_US(smc_voltage_tab->StdVoltageHiSidd); + CONVERT_FROM_HOST_TO_SMC_US(smc_voltage_tab->StdVoltageHiSidd); + + return 0; +} + +static int iceland_populate_smc_vddc_table(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *table) +{ + unsigned int count; + int result; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + table->VddcLevelCount = data->vddc_voltage_table.count; + for (count = 0; count < table->VddcLevelCount; count++) { + result = iceland_populate_smc_voltage_table(hwmgr, + &(data->vddc_voltage_table.entries[count]), + &(table->VddcLevel[count])); + PP_ASSERT_WITH_CODE(0 == result, "do not populate SMC VDDC voltage table", return -EINVAL); + + /* GPIO voltage control */ + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->voltage_control) + table->VddcLevel[count].Smio |= data->vddc_voltage_table.entries[count].smio_low; + else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) + table->VddcLevel[count].Smio = 0; + } + + CONVERT_FROM_HOST_TO_SMC_UL(table->VddcLevelCount); + + return 0; +} + +static int iceland_populate_smc_vdd_ci_table(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *table) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t count; + int result; + + table->VddciLevelCount = data->vddci_voltage_table.count; + + for (count = 0; count < table->VddciLevelCount; count++) { + result = iceland_populate_smc_voltage_table(hwmgr, + &(data->vddci_voltage_table.entries[count]), + &(table->VddciLevel[count])); + PP_ASSERT_WITH_CODE(result == 0, "do not populate SMC VDDCI voltage table", return -EINVAL); + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) + table->VddciLevel[count].Smio |= data->vddci_voltage_table.entries[count].smio_low; + else + table->VddciLevel[count].Smio |= 0; + } + + CONVERT_FROM_HOST_TO_SMC_UL(table->VddciLevelCount); + + return 0; +} + +static int iceland_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *table) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t count; + int result; + + table->MvddLevelCount = data->mvdd_voltage_table.count; + + for (count = 0; count < table->VddciLevelCount; count++) { + result = iceland_populate_smc_voltage_table(hwmgr, + &(data->mvdd_voltage_table.entries[count]), + &table->MvddLevel[count]); + PP_ASSERT_WITH_CODE(result == 0, "do not populate SMC mvdd voltage table", return -EINVAL); + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) + table->MvddLevel[count].Smio |= data->mvdd_voltage_table.entries[count].smio_low; + else + table->MvddLevel[count].Smio |= 0; + } + + CONVERT_FROM_HOST_TO_SMC_UL(table->MvddLevelCount); + + return 0; +} + + +static int iceland_populate_smc_voltage_tables(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *table) +{ + int result; + + result = iceland_populate_smc_vddc_table(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "can not populate VDDC voltage table to SMC", return -EINVAL); + + result = iceland_populate_smc_vdd_ci_table(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "can not populate VDDCI voltage table to SMC", return -EINVAL); + + result = iceland_populate_smc_mvdd_table(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "can not populate MVDD voltage table to SMC", return -EINVAL); + + return 0; +} + +static int iceland_populate_ulv_level(struct pp_hwmgr *hwmgr, + struct SMU71_Discrete_Ulv *state) +{ + uint32_t voltage_response_time, ulv_voltage; + int result; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + state->CcPwrDynRm = 0; + state->CcPwrDynRm1 = 0; + + result = pp_tables_get_response_times(hwmgr, &voltage_response_time, &ulv_voltage); + PP_ASSERT_WITH_CODE((0 == result), "can not get ULV voltage value", return result;); + + if (ulv_voltage == 0) { + data->ulv_supported = false; + return 0; + } + + if (data->voltage_control != SMU7_VOLTAGE_CONTROL_BY_SVID2) { + /* use minimum voltage if ulv voltage in pptable is bigger than minimum voltage */ + if (ulv_voltage > hwmgr->dyn_state.vddc_dependency_on_sclk->entries[0].v) + state->VddcOffset = 0; + else + /* used in SMIO Mode. not implemented for now. this is backup only for CI. */ + state->VddcOffset = (uint16_t)(hwmgr->dyn_state.vddc_dependency_on_sclk->entries[0].v - ulv_voltage); + } else { + /* use minimum voltage if ulv voltage in pptable is bigger than minimum voltage */ + if (ulv_voltage > hwmgr->dyn_state.vddc_dependency_on_sclk->entries[0].v) + state->VddcOffsetVid = 0; + else /* used in SVI2 Mode */ + state->VddcOffsetVid = (uint8_t)( + (hwmgr->dyn_state.vddc_dependency_on_sclk->entries[0].v - ulv_voltage) + * VOLTAGE_VID_OFFSET_SCALE2 + / VOLTAGE_VID_OFFSET_SCALE1); + } + state->VddcPhase = 1; + + CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm1); + CONVERT_FROM_HOST_TO_SMC_US(state->VddcOffset); + + return 0; +} + +static int iceland_populate_ulv_state(struct pp_hwmgr *hwmgr, + SMU71_Discrete_Ulv *ulv_level) +{ + return iceland_populate_ulv_level(hwmgr, ulv_level); +} + +static int iceland_populate_smc_link_level(struct pp_hwmgr *hwmgr, SMU71_Discrete_DpmTable *table) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct smu7_dpm_table *dpm_table = &data->dpm_table; + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + uint32_t i; + + /* Index (dpm_table->pcie_speed_table.count) is reserved for PCIE boot level. */ + for (i = 0; i <= dpm_table->pcie_speed_table.count; i++) { + table->LinkLevel[i].PcieGenSpeed = + (uint8_t)dpm_table->pcie_speed_table.dpm_levels[i].value; + table->LinkLevel[i].PcieLaneCount = + (uint8_t)encode_pcie_lane_width(dpm_table->pcie_speed_table.dpm_levels[i].param1); + table->LinkLevel[i].EnabledForActivity = + 1; + table->LinkLevel[i].SPC = + (uint8_t)(data->pcie_spc_cap & 0xff); + table->LinkLevel[i].DownThreshold = + PP_HOST_TO_SMC_UL(5); + table->LinkLevel[i].UpThreshold = + PP_HOST_TO_SMC_UL(30); + } + + smu_data->smc_state_table.LinkLevelCount = + (uint8_t)dpm_table->pcie_speed_table.count; + data->dpm_level_enable_mask.pcie_dpm_enable_mask = + phm_get_dpm_level_enable_mask_value(&dpm_table->pcie_speed_table); + + return 0; +} + +static int iceland_calculate_sclk_params(struct pp_hwmgr *hwmgr, + uint32_t engine_clock, SMU71_Discrete_GraphicsLevel *sclk) +{ + const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + pp_atomctrl_clock_dividers_vi dividers; + uint32_t spll_func_cntl = data->clock_registers.vCG_SPLL_FUNC_CNTL; + uint32_t spll_func_cntl_3 = data->clock_registers.vCG_SPLL_FUNC_CNTL_3; + uint32_t spll_func_cntl_4 = data->clock_registers.vCG_SPLL_FUNC_CNTL_4; + uint32_t cg_spll_spread_spectrum = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM; + uint32_t cg_spll_spread_spectrum_2 = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM_2; + uint32_t reference_clock; + uint32_t reference_divider; + uint32_t fbdiv; + int result; + + /* get the engine clock dividers for this clock value*/ + result = atomctrl_get_engine_pll_dividers_vi(hwmgr, engine_clock, ÷rs); + + PP_ASSERT_WITH_CODE(result == 0, + "Error retrieving Engine Clock dividers from VBIOS.", return result); + + /* To get FBDIV we need to multiply this by 16384 and divide it by Fref.*/ + reference_clock = atomctrl_get_reference_clock(hwmgr); + + reference_divider = 1 + dividers.uc_pll_ref_div; + + /* low 14 bits is fraction and high 12 bits is divider*/ + fbdiv = dividers.ul_fb_div.ul_fb_divider & 0x3FFFFFF; + + /* SPLL_FUNC_CNTL setup*/ + spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, + CG_SPLL_FUNC_CNTL, SPLL_REF_DIV, dividers.uc_pll_ref_div); + spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, + CG_SPLL_FUNC_CNTL, SPLL_PDIV_A, dividers.uc_pll_post_div); + + /* SPLL_FUNC_CNTL_3 setup*/ + spll_func_cntl_3 = PHM_SET_FIELD(spll_func_cntl_3, + CG_SPLL_FUNC_CNTL_3, SPLL_FB_DIV, fbdiv); + + /* set to use fractional accumulation*/ + spll_func_cntl_3 = PHM_SET_FIELD(spll_func_cntl_3, + CG_SPLL_FUNC_CNTL_3, SPLL_DITHEN, 1); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_EngineSpreadSpectrumSupport)) { + pp_atomctrl_internal_ss_info ss_info; + + uint32_t vcoFreq = engine_clock * dividers.uc_pll_post_div; + if (0 == atomctrl_get_engine_clock_spread_spectrum(hwmgr, vcoFreq, &ss_info)) { + /* + * ss_info.speed_spectrum_percentage -- in unit of 0.01% + * ss_info.speed_spectrum_rate -- in unit of khz + */ + /* clks = reference_clock * 10 / (REFDIV + 1) / speed_spectrum_rate / 2 */ + uint32_t clkS = reference_clock * 5 / (reference_divider * ss_info.speed_spectrum_rate); + + /* clkv = 2 * D * fbdiv / NS */ + uint32_t clkV = 4 * ss_info.speed_spectrum_percentage * fbdiv / (clkS * 10000); + + cg_spll_spread_spectrum = + PHM_SET_FIELD(cg_spll_spread_spectrum, CG_SPLL_SPREAD_SPECTRUM, CLKS, clkS); + cg_spll_spread_spectrum = + PHM_SET_FIELD(cg_spll_spread_spectrum, CG_SPLL_SPREAD_SPECTRUM, SSEN, 1); + cg_spll_spread_spectrum_2 = + PHM_SET_FIELD(cg_spll_spread_spectrum_2, CG_SPLL_SPREAD_SPECTRUM_2, CLKV, clkV); + } + } + + sclk->SclkFrequency = engine_clock; + sclk->CgSpllFuncCntl3 = spll_func_cntl_3; + sclk->CgSpllFuncCntl4 = spll_func_cntl_4; + sclk->SpllSpreadSpectrum = cg_spll_spread_spectrum; + sclk->SpllSpreadSpectrum2 = cg_spll_spread_spectrum_2; + sclk->SclkDid = (uint8_t)dividers.pll_post_divider; + + return 0; +} + +static int iceland_populate_phase_value_based_on_sclk(struct pp_hwmgr *hwmgr, + const struct phm_phase_shedding_limits_table *pl, + uint32_t sclk, uint32_t *p_shed) +{ + unsigned int i; + + /* use the minimum phase shedding */ + *p_shed = 1; + + for (i = 0; i < pl->count; i++) { + if (sclk < pl->entries[i].Sclk) { + *p_shed = i; + break; + } + } + return 0; +} + +static int iceland_populate_single_graphic_level(struct pp_hwmgr *hwmgr, + uint32_t engine_clock, + uint16_t sclk_activity_level_threshold, + SMU71_Discrete_GraphicsLevel *graphic_level) +{ + int result; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + result = iceland_calculate_sclk_params(hwmgr, engine_clock, graphic_level); + + /* populate graphics levels*/ + result = iceland_get_dependency_volt_by_clk(hwmgr, + hwmgr->dyn_state.vddc_dependency_on_sclk, engine_clock, + &graphic_level->MinVddc); + PP_ASSERT_WITH_CODE((0 == result), + "can not find VDDC voltage value for VDDC \ + engine clock dependency table", return result); + + /* SCLK frequency in units of 10KHz*/ + graphic_level->SclkFrequency = engine_clock; + graphic_level->MinVddcPhases = 1; + + if (data->vddc_phase_shed_control) + iceland_populate_phase_value_based_on_sclk(hwmgr, + hwmgr->dyn_state.vddc_phase_shed_limits_table, + engine_clock, + &graphic_level->MinVddcPhases); + + /* Indicates maximum activity level for this performance level. 50% for now*/ + graphic_level->ActivityLevel = sclk_activity_level_threshold; + + graphic_level->CcPwrDynRm = 0; + graphic_level->CcPwrDynRm1 = 0; + /* this level can be used if activity is high enough.*/ + graphic_level->EnabledForActivity = 0; + /* this level can be used for throttling.*/ + graphic_level->EnabledForThrottle = 1; + graphic_level->UpHyst = 0; + graphic_level->DownHyst = 100; + graphic_level->VoltageDownHyst = 0; + graphic_level->PowerThrottle = 0; + + data->display_timing.min_clock_in_sr = + hwmgr->display_config.min_core_set_clock_in_sr; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SclkDeepSleep)) + graphic_level->DeepSleepDivId = + smu7_get_sleep_divider_id_from_clock(engine_clock, + data->display_timing.min_clock_in_sr); + + /* Default to slow, highest DPM level will be set to PPSMC_DISPLAY_WATERMARK_LOW later.*/ + graphic_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + + if (0 == result) { + graphic_level->MinVddc = PP_HOST_TO_SMC_UL(graphic_level->MinVddc * VOLTAGE_SCALE); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->MinVddcPhases); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->SclkFrequency); + CONVERT_FROM_HOST_TO_SMC_US(graphic_level->ActivityLevel); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->CgSpllFuncCntl3); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->CgSpllFuncCntl4); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->SpllSpreadSpectrum); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->SpllSpreadSpectrum2); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->CcPwrDynRm1); + } + + return result; +} + +static int iceland_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + struct smu7_dpm_table *dpm_table = &data->dpm_table; + uint32_t level_array_adress = smu_data->smu7_data.dpm_table_start + + offsetof(SMU71_Discrete_DpmTable, GraphicsLevel); + + uint32_t level_array_size = sizeof(SMU71_Discrete_GraphicsLevel) * + SMU71_MAX_LEVELS_GRAPHICS; + + SMU71_Discrete_GraphicsLevel *levels = smu_data->smc_state_table.GraphicsLevel; + + uint32_t i; + uint8_t highest_pcie_level_enabled = 0; + uint8_t lowest_pcie_level_enabled = 0, mid_pcie_level_enabled = 0; + uint8_t count = 0; + int result = 0; + + memset(levels, 0x00, level_array_size); + + for (i = 0; i < dpm_table->sclk_table.count; i++) { + result = iceland_populate_single_graphic_level(hwmgr, + dpm_table->sclk_table.dpm_levels[i].value, + (uint16_t)smu_data->activity_target[i], + &(smu_data->smc_state_table.GraphicsLevel[i])); + if (result != 0) + return result; + + /* Making sure only DPM level 0-1 have Deep Sleep Div ID populated. */ + if (i > 1) + smu_data->smc_state_table.GraphicsLevel[i].DeepSleepDivId = 0; + } + + /* Only enable level 0 for now. */ + smu_data->smc_state_table.GraphicsLevel[0].EnabledForActivity = 1; + + /* set highest level watermark to high */ + if (dpm_table->sclk_table.count > 1) + smu_data->smc_state_table.GraphicsLevel[dpm_table->sclk_table.count-1].DisplayWatermark = + PPSMC_DISPLAY_WATERMARK_HIGH; + + smu_data->smc_state_table.GraphicsDpmLevelCount = + (uint8_t)dpm_table->sclk_table.count; + data->dpm_level_enable_mask.sclk_dpm_enable_mask = + phm_get_dpm_level_enable_mask_value(&dpm_table->sclk_table); + + while ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1 << (highest_pcie_level_enabled + 1))) != 0) { + highest_pcie_level_enabled++; + } + + while ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1 << lowest_pcie_level_enabled)) == 0) { + lowest_pcie_level_enabled++; + } + + while ((count < highest_pcie_level_enabled) && + ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1 << (lowest_pcie_level_enabled + 1 + count))) == 0)) { + count++; + } + + mid_pcie_level_enabled = (lowest_pcie_level_enabled+1+count) < highest_pcie_level_enabled ? + (lowest_pcie_level_enabled+1+count) : highest_pcie_level_enabled; + + + /* set pcieDpmLevel to highest_pcie_level_enabled*/ + for (i = 2; i < dpm_table->sclk_table.count; i++) { + smu_data->smc_state_table.GraphicsLevel[i].pcieDpmLevel = highest_pcie_level_enabled; + } + + /* set pcieDpmLevel to lowest_pcie_level_enabled*/ + smu_data->smc_state_table.GraphicsLevel[0].pcieDpmLevel = lowest_pcie_level_enabled; + + /* set pcieDpmLevel to mid_pcie_level_enabled*/ + smu_data->smc_state_table.GraphicsLevel[1].pcieDpmLevel = mid_pcie_level_enabled; + + /* level count will send to smc once at init smc table and never change*/ + result = smu7_copy_bytes_to_smc(hwmgr, level_array_adress, + (uint8_t *)levels, (uint32_t)level_array_size, + SMC_RAM_END); + + return result; +} + +static int iceland_calculate_mclk_params( + struct pp_hwmgr *hwmgr, + uint32_t memory_clock, + SMU71_Discrete_MemoryLevel *mclk, + bool strobe_mode, + bool dllStateOn + ) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + uint32_t dll_cntl = data->clock_registers.vDLL_CNTL; + uint32_t mclk_pwrmgt_cntl = data->clock_registers.vMCLK_PWRMGT_CNTL; + uint32_t mpll_ad_func_cntl = data->clock_registers.vMPLL_AD_FUNC_CNTL; + uint32_t mpll_dq_func_cntl = data->clock_registers.vMPLL_DQ_FUNC_CNTL; + uint32_t mpll_func_cntl = data->clock_registers.vMPLL_FUNC_CNTL; + uint32_t mpll_func_cntl_1 = data->clock_registers.vMPLL_FUNC_CNTL_1; + uint32_t mpll_func_cntl_2 = data->clock_registers.vMPLL_FUNC_CNTL_2; + uint32_t mpll_ss1 = data->clock_registers.vMPLL_SS1; + uint32_t mpll_ss2 = data->clock_registers.vMPLL_SS2; + + pp_atomctrl_memory_clock_param mpll_param; + int result; + + result = atomctrl_get_memory_pll_dividers_si(hwmgr, + memory_clock, &mpll_param, strobe_mode); + PP_ASSERT_WITH_CODE(0 == result, + "Error retrieving Memory Clock Parameters from VBIOS.", return result); + + /* MPLL_FUNC_CNTL setup*/ + mpll_func_cntl = PHM_SET_FIELD(mpll_func_cntl, MPLL_FUNC_CNTL, BWCTRL, mpll_param.bw_ctrl); + + /* MPLL_FUNC_CNTL_1 setup*/ + mpll_func_cntl_1 = PHM_SET_FIELD(mpll_func_cntl_1, + MPLL_FUNC_CNTL_1, CLKF, mpll_param.mpll_fb_divider.cl_kf); + mpll_func_cntl_1 = PHM_SET_FIELD(mpll_func_cntl_1, + MPLL_FUNC_CNTL_1, CLKFRAC, mpll_param.mpll_fb_divider.clk_frac); + mpll_func_cntl_1 = PHM_SET_FIELD(mpll_func_cntl_1, + MPLL_FUNC_CNTL_1, VCO_MODE, mpll_param.vco_mode); + + /* MPLL_AD_FUNC_CNTL setup*/ + mpll_ad_func_cntl = PHM_SET_FIELD(mpll_ad_func_cntl, + MPLL_AD_FUNC_CNTL, YCLK_POST_DIV, mpll_param.mpll_post_divider); + + if (data->is_memory_gddr5) { + /* MPLL_DQ_FUNC_CNTL setup*/ + mpll_dq_func_cntl = PHM_SET_FIELD(mpll_dq_func_cntl, + MPLL_DQ_FUNC_CNTL, YCLK_SEL, mpll_param.yclk_sel); + mpll_dq_func_cntl = PHM_SET_FIELD(mpll_dq_func_cntl, + MPLL_DQ_FUNC_CNTL, YCLK_POST_DIV, mpll_param.mpll_post_divider); + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MemorySpreadSpectrumSupport)) { + /* + ************************************ + Fref = Reference Frequency + NF = Feedback divider ratio + NR = Reference divider ratio + Fnom = Nominal VCO output frequency = Fref * NF / NR + Fs = Spreading Rate + D = Percentage down-spread / 2 + Fint = Reference input frequency to PFD = Fref / NR + NS = Spreading rate divider ratio = int(Fint / (2 * Fs)) + CLKS = NS - 1 = ISS_STEP_NUM[11:0] + NV = D * Fs / Fnom * 4 * ((Fnom/Fref * NR) ^ 2) + CLKV = 65536 * NV = ISS_STEP_SIZE[25:0] + ************************************* + */ + pp_atomctrl_internal_ss_info ss_info; + uint32_t freq_nom; + uint32_t tmp; + uint32_t reference_clock = atomctrl_get_mpll_reference_clock(hwmgr); + + /* for GDDR5 for all modes and DDR3 */ + if (1 == mpll_param.qdr) + freq_nom = memory_clock * 4 * (1 << mpll_param.mpll_post_divider); + else + freq_nom = memory_clock * 2 * (1 << mpll_param.mpll_post_divider); + + /* tmp = (freq_nom / reference_clock * reference_divider) ^ 2 Note: S.I. reference_divider = 1*/ + tmp = (freq_nom / reference_clock); + tmp = tmp * tmp; + + if (0 == atomctrl_get_memory_clock_spread_spectrum(hwmgr, freq_nom, &ss_info)) { + /* ss_info.speed_spectrum_percentage -- in unit of 0.01% */ + /* ss.Info.speed_spectrum_rate -- in unit of khz */ + /* CLKS = reference_clock / (2 * speed_spectrum_rate * reference_divider) * 10 */ + /* = reference_clock * 5 / speed_spectrum_rate */ + uint32_t clks = reference_clock * 5 / ss_info.speed_spectrum_rate; + + /* CLKV = 65536 * speed_spectrum_percentage / 2 * spreadSpecrumRate / freq_nom * 4 / 100000 * ((freq_nom / reference_clock) ^ 2) */ + /* = 131 * speed_spectrum_percentage * speed_spectrum_rate / 100 * ((freq_nom / reference_clock) ^ 2) / freq_nom */ + uint32_t clkv = + (uint32_t)((((131 * ss_info.speed_spectrum_percentage * + ss_info.speed_spectrum_rate) / 100) * tmp) / freq_nom); + + mpll_ss1 = PHM_SET_FIELD(mpll_ss1, MPLL_SS1, CLKV, clkv); + mpll_ss2 = PHM_SET_FIELD(mpll_ss2, MPLL_SS2, CLKS, clks); + } + } + + /* MCLK_PWRMGT_CNTL setup */ + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, DLL_SPEED, mpll_param.dll_speed); + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, MRDCK0_PDNB, dllStateOn); + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, MRDCK1_PDNB, dllStateOn); + + + /* Save the result data to outpupt memory level structure */ + mclk->MclkFrequency = memory_clock; + mclk->MpllFuncCntl = mpll_func_cntl; + mclk->MpllFuncCntl_1 = mpll_func_cntl_1; + mclk->MpllFuncCntl_2 = mpll_func_cntl_2; + mclk->MpllAdFuncCntl = mpll_ad_func_cntl; + mclk->MpllDqFuncCntl = mpll_dq_func_cntl; + mclk->MclkPwrmgtCntl = mclk_pwrmgt_cntl; + mclk->DllCntl = dll_cntl; + mclk->MpllSs1 = mpll_ss1; + mclk->MpllSs2 = mpll_ss2; + + return 0; +} + +static uint8_t iceland_get_mclk_frequency_ratio(uint32_t memory_clock, + bool strobe_mode) +{ + uint8_t mc_para_index; + + if (strobe_mode) { + if (memory_clock < 12500) { + mc_para_index = 0x00; + } else if (memory_clock > 47500) { + mc_para_index = 0x0f; + } else { + mc_para_index = (uint8_t)((memory_clock - 10000) / 2500); + } + } else { + if (memory_clock < 65000) { + mc_para_index = 0x00; + } else if (memory_clock > 135000) { + mc_para_index = 0x0f; + } else { + mc_para_index = (uint8_t)((memory_clock - 60000) / 5000); + } + } + + return mc_para_index; +} + +static uint8_t iceland_get_ddr3_mclk_frequency_ratio(uint32_t memory_clock) +{ + uint8_t mc_para_index; + + if (memory_clock < 10000) { + mc_para_index = 0; + } else if (memory_clock >= 80000) { + mc_para_index = 0x0f; + } else { + mc_para_index = (uint8_t)((memory_clock - 10000) / 5000 + 1); + } + + return mc_para_index; +} + +static int iceland_populate_phase_value_based_on_mclk(struct pp_hwmgr *hwmgr, const struct phm_phase_shedding_limits_table *pl, + uint32_t memory_clock, uint32_t *p_shed) +{ + unsigned int i; + + *p_shed = 1; + + for (i = 0; i < pl->count; i++) { + if (memory_clock < pl->entries[i].Mclk) { + *p_shed = i; + break; + } + } + + return 0; +} + +static int iceland_populate_single_memory_level( + struct pp_hwmgr *hwmgr, + uint32_t memory_clock, + SMU71_Discrete_MemoryLevel *memory_level + ) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + int result = 0; + bool dll_state_on; + struct cgs_display_info info = {0}; + uint32_t mclk_edc_wr_enable_threshold = 40000; + uint32_t mclk_edc_enable_threshold = 40000; + uint32_t mclk_strobe_mode_threshold = 40000; + + if (hwmgr->dyn_state.vddc_dependency_on_mclk != NULL) { + result = iceland_get_dependency_volt_by_clk(hwmgr, + hwmgr->dyn_state.vddc_dependency_on_mclk, memory_clock, &memory_level->MinVddc); + PP_ASSERT_WITH_CODE((0 == result), + "can not find MinVddc voltage value from memory VDDC voltage dependency table", return result); + } + + if (data->vddci_control == SMU7_VOLTAGE_CONTROL_NONE) { + memory_level->MinVddci = memory_level->MinVddc; + } else if (NULL != hwmgr->dyn_state.vddci_dependency_on_mclk) { + result = iceland_get_dependency_volt_by_clk(hwmgr, + hwmgr->dyn_state.vddci_dependency_on_mclk, + memory_clock, + &memory_level->MinVddci); + PP_ASSERT_WITH_CODE((0 == result), + "can not find MinVddci voltage value from memory VDDCI voltage dependency table", return result); + } + + memory_level->MinVddcPhases = 1; + + if (data->vddc_phase_shed_control) { + iceland_populate_phase_value_based_on_mclk(hwmgr, hwmgr->dyn_state.vddc_phase_shed_limits_table, + memory_clock, &memory_level->MinVddcPhases); + } + + memory_level->EnabledForThrottle = 1; + memory_level->EnabledForActivity = 0; + memory_level->UpHyst = 0; + memory_level->DownHyst = 100; + memory_level->VoltageDownHyst = 0; + + /* Indicates maximum activity level for this performance level.*/ + memory_level->ActivityLevel = (uint16_t)data->mclk_activity_target; + memory_level->StutterEnable = 0; + memory_level->StrobeEnable = 0; + memory_level->EdcReadEnable = 0; + memory_level->EdcWriteEnable = 0; + memory_level->RttEnable = 0; + + /* default set to low watermark. Highest level will be set to high later.*/ + memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + + cgs_get_active_displays_info(hwmgr->device, &info); + data->display_timing.num_existing_displays = info.display_count; + + /* stutter mode not support on iceland */ + + /* decide strobe mode*/ + memory_level->StrobeEnable = (mclk_strobe_mode_threshold != 0) && + (memory_clock <= mclk_strobe_mode_threshold); + + /* decide EDC mode and memory clock ratio*/ + if (data->is_memory_gddr5) { + memory_level->StrobeRatio = iceland_get_mclk_frequency_ratio(memory_clock, + memory_level->StrobeEnable); + + if ((mclk_edc_enable_threshold != 0) && + (memory_clock > mclk_edc_enable_threshold)) { + memory_level->EdcReadEnable = 1; + } + + if ((mclk_edc_wr_enable_threshold != 0) && + (memory_clock > mclk_edc_wr_enable_threshold)) { + memory_level->EdcWriteEnable = 1; + } + + if (memory_level->StrobeEnable) { + if (iceland_get_mclk_frequency_ratio(memory_clock, 1) >= + ((cgs_read_register(hwmgr->device, mmMC_SEQ_MISC7) >> 16) & 0xf)) + dll_state_on = ((cgs_read_register(hwmgr->device, mmMC_SEQ_MISC5) >> 1) & 0x1) ? 1 : 0; + else + dll_state_on = ((cgs_read_register(hwmgr->device, mmMC_SEQ_MISC6) >> 1) & 0x1) ? 1 : 0; + } else + dll_state_on = data->dll_default_on; + } else { + memory_level->StrobeRatio = + iceland_get_ddr3_mclk_frequency_ratio(memory_clock); + dll_state_on = ((cgs_read_register(hwmgr->device, mmMC_SEQ_MISC5) >> 1) & 0x1) ? 1 : 0; + } + + result = iceland_calculate_mclk_params(hwmgr, + memory_clock, memory_level, memory_level->StrobeEnable, dll_state_on); + + if (0 == result) { + memory_level->MinVddc = PP_HOST_TO_SMC_UL(memory_level->MinVddc * VOLTAGE_SCALE); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MinVddcPhases); + memory_level->MinVddci = PP_HOST_TO_SMC_UL(memory_level->MinVddci * VOLTAGE_SCALE); + memory_level->MinMvdd = PP_HOST_TO_SMC_UL(memory_level->MinMvdd * VOLTAGE_SCALE); + /* MCLK frequency in units of 10KHz*/ + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MclkFrequency); + /* Indicates maximum activity level for this performance level.*/ + CONVERT_FROM_HOST_TO_SMC_US(memory_level->ActivityLevel); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllFuncCntl); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllFuncCntl_1); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllFuncCntl_2); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllAdFuncCntl); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllDqFuncCntl); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MclkPwrmgtCntl); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->DllCntl); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllSs1); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllSs2); + } + + return result; +} + +static int iceland_populate_all_memory_levels(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + struct smu7_dpm_table *dpm_table = &data->dpm_table; + int result; + + /* populate MCLK dpm table to SMU7 */ + uint32_t level_array_adress = smu_data->smu7_data.dpm_table_start + offsetof(SMU71_Discrete_DpmTable, MemoryLevel); + uint32_t level_array_size = sizeof(SMU71_Discrete_MemoryLevel) * SMU71_MAX_LEVELS_MEMORY; + SMU71_Discrete_MemoryLevel *levels = smu_data->smc_state_table.MemoryLevel; + uint32_t i; + + memset(levels, 0x00, level_array_size); + + for (i = 0; i < dpm_table->mclk_table.count; i++) { + PP_ASSERT_WITH_CODE((0 != dpm_table->mclk_table.dpm_levels[i].value), + "can not populate memory level as memory clock is zero", return -EINVAL); + result = iceland_populate_single_memory_level(hwmgr, dpm_table->mclk_table.dpm_levels[i].value, + &(smu_data->smc_state_table.MemoryLevel[i])); + if (0 != result) { + return result; + } + } + + /* Only enable level 0 for now.*/ + smu_data->smc_state_table.MemoryLevel[0].EnabledForActivity = 1; + + /* + * in order to prevent MC activity from stutter mode to push DPM up. + * the UVD change complements this by putting the MCLK in a higher state + * by default such that we are not effected by up threshold or and MCLK DPM latency. + */ + smu_data->smc_state_table.MemoryLevel[0].ActivityLevel = 0x1F; + CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table.MemoryLevel[0].ActivityLevel); + + smu_data->smc_state_table.MemoryDpmLevelCount = (uint8_t)dpm_table->mclk_table.count; + data->dpm_level_enable_mask.mclk_dpm_enable_mask = phm_get_dpm_level_enable_mask_value(&dpm_table->mclk_table); + /* set highest level watermark to high*/ + smu_data->smc_state_table.MemoryLevel[dpm_table->mclk_table.count-1].DisplayWatermark = PPSMC_DISPLAY_WATERMARK_HIGH; + + /* level count will send to smc once at init smc table and never change*/ + result = smu7_copy_bytes_to_smc(hwmgr, + level_array_adress, (uint8_t *)levels, (uint32_t)level_array_size, + SMC_RAM_END); + + return result; +} + +static int iceland_populate_mvdd_value(struct pp_hwmgr *hwmgr, uint32_t mclk, + SMU71_Discrete_VoltageLevel *voltage) +{ + const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + uint32_t i = 0; + + if (SMU7_VOLTAGE_CONTROL_NONE != data->mvdd_control) { + /* find mvdd value which clock is more than request */ + for (i = 0; i < hwmgr->dyn_state.mvdd_dependency_on_mclk->count; i++) { + if (mclk <= hwmgr->dyn_state.mvdd_dependency_on_mclk->entries[i].clk) { + /* Always round to higher voltage. */ + voltage->Voltage = data->mvdd_voltage_table.entries[i].value; + break; + } + } + + PP_ASSERT_WITH_CODE(i < hwmgr->dyn_state.mvdd_dependency_on_mclk->count, + "MVDD Voltage is outside the supported range.", return -EINVAL); + + } else { + return -EINVAL; + } + + return 0; +} + +static int iceland_populate_smc_acpi_level(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *table) +{ + int result = 0; + const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct pp_atomctrl_clock_dividers_vi dividers; + uint32_t vddc_phase_shed_control = 0; + + SMU71_Discrete_VoltageLevel voltage_level; + uint32_t spll_func_cntl = data->clock_registers.vCG_SPLL_FUNC_CNTL; + uint32_t spll_func_cntl_2 = data->clock_registers.vCG_SPLL_FUNC_CNTL_2; + uint32_t dll_cntl = data->clock_registers.vDLL_CNTL; + uint32_t mclk_pwrmgt_cntl = data->clock_registers.vMCLK_PWRMGT_CNTL; + + + /* The ACPI state should not do DPM on DC (or ever).*/ + table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC; + + if (data->acpi_vddc) + table->ACPILevel.MinVddc = PP_HOST_TO_SMC_UL(data->acpi_vddc * VOLTAGE_SCALE); + else + table->ACPILevel.MinVddc = PP_HOST_TO_SMC_UL(data->min_vddc_in_pptable * VOLTAGE_SCALE); + + table->ACPILevel.MinVddcPhases = vddc_phase_shed_control ? 0 : 1; + /* assign zero for now*/ + table->ACPILevel.SclkFrequency = atomctrl_get_reference_clock(hwmgr); + + /* get the engine clock dividers for this clock value*/ + result = atomctrl_get_engine_pll_dividers_vi(hwmgr, + table->ACPILevel.SclkFrequency, ÷rs); + + PP_ASSERT_WITH_CODE(result == 0, + "Error retrieving Engine Clock dividers from VBIOS.", return result); + + /* divider ID for required SCLK*/ + table->ACPILevel.SclkDid = (uint8_t)dividers.pll_post_divider; + table->ACPILevel.DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + table->ACPILevel.DeepSleepDivId = 0; + + spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, + CG_SPLL_FUNC_CNTL, SPLL_PWRON, 0); + spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, + CG_SPLL_FUNC_CNTL, SPLL_RESET, 1); + spll_func_cntl_2 = PHM_SET_FIELD(spll_func_cntl_2, + CG_SPLL_FUNC_CNTL_2, SCLK_MUX_SEL, 4); + + table->ACPILevel.CgSpllFuncCntl = spll_func_cntl; + table->ACPILevel.CgSpllFuncCntl2 = spll_func_cntl_2; + table->ACPILevel.CgSpllFuncCntl3 = data->clock_registers.vCG_SPLL_FUNC_CNTL_3; + table->ACPILevel.CgSpllFuncCntl4 = data->clock_registers.vCG_SPLL_FUNC_CNTL_4; + table->ACPILevel.SpllSpreadSpectrum = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM; + table->ACPILevel.SpllSpreadSpectrum2 = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM_2; + table->ACPILevel.CcPwrDynRm = 0; + table->ACPILevel.CcPwrDynRm1 = 0; + + + /* For various features to be enabled/disabled while this level is active.*/ + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.Flags); + /* SCLK frequency in units of 10KHz*/ + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SclkFrequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl2); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl3); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl4); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SpllSpreadSpectrum); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SpllSpreadSpectrum2); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm1); + + /* table->MemoryACPILevel.MinVddcPhases = table->ACPILevel.MinVddcPhases;*/ + table->MemoryACPILevel.MinVddc = table->ACPILevel.MinVddc; + table->MemoryACPILevel.MinVddcPhases = table->ACPILevel.MinVddcPhases; + + if (SMU7_VOLTAGE_CONTROL_NONE == data->vddci_control) + table->MemoryACPILevel.MinVddci = table->MemoryACPILevel.MinVddc; + else { + if (data->acpi_vddci != 0) + table->MemoryACPILevel.MinVddci = PP_HOST_TO_SMC_UL(data->acpi_vddci * VOLTAGE_SCALE); + else + table->MemoryACPILevel.MinVddci = PP_HOST_TO_SMC_UL(data->min_vddci_in_pptable * VOLTAGE_SCALE); + } + + if (0 == iceland_populate_mvdd_value(hwmgr, 0, &voltage_level)) + table->MemoryACPILevel.MinMvdd = + PP_HOST_TO_SMC_UL(voltage_level.Voltage * VOLTAGE_SCALE); + else + table->MemoryACPILevel.MinMvdd = 0; + + /* Force reset on DLL*/ + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, MRDCK0_RESET, 0x1); + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, MRDCK1_RESET, 0x1); + + /* Disable DLL in ACPIState*/ + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, MRDCK0_PDNB, 0); + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, MRDCK1_PDNB, 0); + + /* Enable DLL bypass signal*/ + dll_cntl = PHM_SET_FIELD(dll_cntl, + DLL_CNTL, MRDCK0_BYPASS, 0); + dll_cntl = PHM_SET_FIELD(dll_cntl, + DLL_CNTL, MRDCK1_BYPASS, 0); + + table->MemoryACPILevel.DllCntl = + PP_HOST_TO_SMC_UL(dll_cntl); + table->MemoryACPILevel.MclkPwrmgtCntl = + PP_HOST_TO_SMC_UL(mclk_pwrmgt_cntl); + table->MemoryACPILevel.MpllAdFuncCntl = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_AD_FUNC_CNTL); + table->MemoryACPILevel.MpllDqFuncCntl = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_DQ_FUNC_CNTL); + table->MemoryACPILevel.MpllFuncCntl = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_FUNC_CNTL); + table->MemoryACPILevel.MpllFuncCntl_1 = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_FUNC_CNTL_1); + table->MemoryACPILevel.MpllFuncCntl_2 = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_FUNC_CNTL_2); + table->MemoryACPILevel.MpllSs1 = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_SS1); + table->MemoryACPILevel.MpllSs2 = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_SS2); + + table->MemoryACPILevel.EnabledForThrottle = 0; + table->MemoryACPILevel.EnabledForActivity = 0; + table->MemoryACPILevel.UpHyst = 0; + table->MemoryACPILevel.DownHyst = 100; + table->MemoryACPILevel.VoltageDownHyst = 0; + /* Indicates maximum activity level for this performance level.*/ + table->MemoryACPILevel.ActivityLevel = PP_HOST_TO_SMC_US((uint16_t)data->mclk_activity_target); + + table->MemoryACPILevel.StutterEnable = 0; + table->MemoryACPILevel.StrobeEnable = 0; + table->MemoryACPILevel.EdcReadEnable = 0; + table->MemoryACPILevel.EdcWriteEnable = 0; + table->MemoryACPILevel.RttEnable = 0; + + return result; +} + +static int iceland_populate_smc_uvd_level(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *table) +{ + return 0; +} + +static int iceland_populate_smc_vce_level(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *table) +{ + return 0; +} + +static int iceland_populate_smc_acp_level(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *table) +{ + return 0; +} + +static int iceland_populate_smc_samu_level(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *table) +{ + return 0; +} + +static int iceland_populate_memory_timing_parameters( + struct pp_hwmgr *hwmgr, + uint32_t engine_clock, + uint32_t memory_clock, + struct SMU71_Discrete_MCArbDramTimingTableEntry *arb_regs + ) +{ + uint32_t dramTiming; + uint32_t dramTiming2; + uint32_t burstTime; + int result; + + result = atomctrl_set_engine_dram_timings_rv770(hwmgr, + engine_clock, memory_clock); + + PP_ASSERT_WITH_CODE(result == 0, + "Error calling VBIOS to set DRAM_TIMING.", return result); + + dramTiming = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING); + dramTiming2 = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING2); + burstTime = PHM_READ_FIELD(hwmgr->device, MC_ARB_BURST_TIME, STATE0); + + arb_regs->McArbDramTiming = PP_HOST_TO_SMC_UL(dramTiming); + arb_regs->McArbDramTiming2 = PP_HOST_TO_SMC_UL(dramTiming2); + arb_regs->McArbBurstTime = (uint8_t)burstTime; + + return 0; +} + +static int iceland_program_memory_timing_parameters(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + int result = 0; + SMU71_Discrete_MCArbDramTimingTable arb_regs; + uint32_t i, j; + + memset(&arb_regs, 0x00, sizeof(SMU71_Discrete_MCArbDramTimingTable)); + + for (i = 0; i < data->dpm_table.sclk_table.count; i++) { + for (j = 0; j < data->dpm_table.mclk_table.count; j++) { + result = iceland_populate_memory_timing_parameters + (hwmgr, data->dpm_table.sclk_table.dpm_levels[i].value, + data->dpm_table.mclk_table.dpm_levels[j].value, + &arb_regs.entries[i][j]); + + if (0 != result) { + break; + } + } + } + + if (0 == result) { + result = smu7_copy_bytes_to_smc( + hwmgr, + smu_data->smu7_data.arb_table_start, + (uint8_t *)&arb_regs, + sizeof(SMU71_Discrete_MCArbDramTimingTable), + SMC_RAM_END + ); + } + + return result; +} + +static int iceland_populate_smc_boot_level(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *table) +{ + int result = 0; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + table->GraphicsBootLevel = 0; + table->MemoryBootLevel = 0; + + /* find boot level from dpm table*/ + result = phm_find_boot_level(&(data->dpm_table.sclk_table), + data->vbios_boot_state.sclk_bootup_value, + (uint32_t *)&(smu_data->smc_state_table.GraphicsBootLevel)); + + if (0 != result) { + smu_data->smc_state_table.GraphicsBootLevel = 0; + pr_err("VBIOS did not find boot engine clock value \ + in dependency table. Using Graphics DPM level 0!"); + result = 0; + } + + result = phm_find_boot_level(&(data->dpm_table.mclk_table), + data->vbios_boot_state.mclk_bootup_value, + (uint32_t *)&(smu_data->smc_state_table.MemoryBootLevel)); + + if (0 != result) { + smu_data->smc_state_table.MemoryBootLevel = 0; + pr_err("VBIOS did not find boot engine clock value \ + in dependency table. Using Memory DPM level 0!"); + result = 0; + } + + table->BootVddc = data->vbios_boot_state.vddc_bootup_value; + if (SMU7_VOLTAGE_CONTROL_NONE == data->vddci_control) + table->BootVddci = table->BootVddc; + else + table->BootVddci = data->vbios_boot_state.vddci_bootup_value; + + table->BootMVdd = data->vbios_boot_state.mvdd_bootup_value; + + return result; +} + +static int iceland_populate_mc_reg_address(struct pp_hwmgr *hwmgr, + SMU71_Discrete_MCRegisters *mc_reg_table) +{ + const struct iceland_smumgr *smu_data = (struct iceland_smumgr *)hwmgr->smu_backend; + + uint32_t i, j; + + for (i = 0, j = 0; j < smu_data->mc_reg_table.last; j++) { + if (smu_data->mc_reg_table.validflag & 1<<j) { + PP_ASSERT_WITH_CODE(i < SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE, + "Index of mc_reg_table->address[] array out of boundary", return -EINVAL); + mc_reg_table->address[i].s0 = + PP_HOST_TO_SMC_US(smu_data->mc_reg_table.mc_reg_address[j].s0); + mc_reg_table->address[i].s1 = + PP_HOST_TO_SMC_US(smu_data->mc_reg_table.mc_reg_address[j].s1); + i++; + } + } + + mc_reg_table->last = (uint8_t)i; + + return 0; +} + +/*convert register values from driver to SMC format */ +static void iceland_convert_mc_registers( + const struct iceland_mc_reg_entry *entry, + SMU71_Discrete_MCRegisterSet *data, + uint32_t num_entries, uint32_t valid_flag) +{ + uint32_t i, j; + + for (i = 0, j = 0; j < num_entries; j++) { + if (valid_flag & 1<<j) { + data->value[i] = PP_HOST_TO_SMC_UL(entry->mc_data[j]); + i++; + } + } +} + +static int iceland_convert_mc_reg_table_entry_to_smc(struct pp_hwmgr *hwmgr, + const uint32_t memory_clock, + SMU71_Discrete_MCRegisterSet *mc_reg_table_data + ) +{ + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + uint32_t i = 0; + + for (i = 0; i < smu_data->mc_reg_table.num_entries; i++) { + if (memory_clock <= + smu_data->mc_reg_table.mc_reg_table_entry[i].mclk_max) { + break; + } + } + + if ((i == smu_data->mc_reg_table.num_entries) && (i > 0)) + --i; + + iceland_convert_mc_registers(&smu_data->mc_reg_table.mc_reg_table_entry[i], + mc_reg_table_data, smu_data->mc_reg_table.last, + smu_data->mc_reg_table.validflag); + + return 0; +} + +static int iceland_convert_mc_reg_table_to_smc(struct pp_hwmgr *hwmgr, + SMU71_Discrete_MCRegisters *mc_regs) +{ + int result = 0; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + int res; + uint32_t i; + + for (i = 0; i < data->dpm_table.mclk_table.count; i++) { + res = iceland_convert_mc_reg_table_entry_to_smc( + hwmgr, + data->dpm_table.mclk_table.dpm_levels[i].value, + &mc_regs->data[i] + ); + + if (0 != res) + result = res; + } + + return result; +} + +static int iceland_update_and_upload_mc_reg_table(struct pp_hwmgr *hwmgr) +{ + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t address; + int32_t result; + + if (0 == (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) + return 0; + + + memset(&smu_data->mc_regs, 0, sizeof(SMU71_Discrete_MCRegisters)); + + result = iceland_convert_mc_reg_table_to_smc(hwmgr, &(smu_data->mc_regs)); + + if (result != 0) + return result; + + + address = smu_data->smu7_data.mc_reg_table_start + (uint32_t)offsetof(SMU71_Discrete_MCRegisters, data[0]); + + return smu7_copy_bytes_to_smc(hwmgr, address, + (uint8_t *)&smu_data->mc_regs.data[0], + sizeof(SMU71_Discrete_MCRegisterSet) * data->dpm_table.mclk_table.count, + SMC_RAM_END); +} + +static int iceland_populate_initial_mc_reg_table(struct pp_hwmgr *hwmgr) +{ + int result; + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + + memset(&smu_data->mc_regs, 0x00, sizeof(SMU71_Discrete_MCRegisters)); + result = iceland_populate_mc_reg_address(hwmgr, &(smu_data->mc_regs)); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize MCRegTable for the MC register addresses!", return result;); + + result = iceland_convert_mc_reg_table_to_smc(hwmgr, &smu_data->mc_regs); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize MCRegTable for driver state!", return result;); + + return smu7_copy_bytes_to_smc(hwmgr, smu_data->smu7_data.mc_reg_table_start, + (uint8_t *)&smu_data->mc_regs, sizeof(SMU71_Discrete_MCRegisters), SMC_RAM_END); +} + +static int iceland_populate_smc_initial_state(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + uint8_t count, level; + + count = (uint8_t)(hwmgr->dyn_state.vddc_dependency_on_sclk->count); + + for (level = 0; level < count; level++) { + if (hwmgr->dyn_state.vddc_dependency_on_sclk->entries[level].clk + >= data->vbios_boot_state.sclk_bootup_value) { + smu_data->smc_state_table.GraphicsBootLevel = level; + break; + } + } + + count = (uint8_t)(hwmgr->dyn_state.vddc_dependency_on_mclk->count); + + for (level = 0; level < count; level++) { + if (hwmgr->dyn_state.vddc_dependency_on_mclk->entries[level].clk + >= data->vbios_boot_state.mclk_bootup_value) { + smu_data->smc_state_table.MemoryBootLevel = level; + break; + } + } + + return 0; +} + +static int iceland_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + const struct iceland_pt_defaults *defaults = smu_data->power_tune_defaults; + SMU71_Discrete_DpmTable *dpm_table = &(smu_data->smc_state_table); + struct phm_cac_tdp_table *cac_dtp_table = hwmgr->dyn_state.cac_dtp_table; + struct phm_ppm_table *ppm = hwmgr->dyn_state.ppm_parameter_table; + const uint16_t *def1, *def2; + int i, j, k; + + + /* + * TDP number of fraction bits are changed from 8 to 7 for Iceland + * as requested by SMC team + */ + + dpm_table->DefaultTdp = PP_HOST_TO_SMC_US((uint16_t)(cac_dtp_table->usTDP * 256)); + dpm_table->TargetTdp = PP_HOST_TO_SMC_US((uint16_t)(cac_dtp_table->usConfigurableTDP * 256)); + + + dpm_table->DTETjOffset = 0; + + dpm_table->GpuTjMax = (uint8_t)(data->thermal_temp_setting.temperature_high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES); + dpm_table->GpuTjHyst = 8; + + dpm_table->DTEAmbientTempBase = defaults->dte_ambient_temp_base; + + /* The following are for new Iceland Multi-input fan/thermal control */ + if (NULL != ppm) { + dpm_table->PPM_PkgPwrLimit = (uint16_t)ppm->dgpu_tdp * 256 / 1000; + dpm_table->PPM_TemperatureLimit = (uint16_t)ppm->tj_max * 256; + } else { + dpm_table->PPM_PkgPwrLimit = 0; + dpm_table->PPM_TemperatureLimit = 0; + } + + CONVERT_FROM_HOST_TO_SMC_US(dpm_table->PPM_PkgPwrLimit); + CONVERT_FROM_HOST_TO_SMC_US(dpm_table->PPM_TemperatureLimit); + + dpm_table->BAPM_TEMP_GRADIENT = PP_HOST_TO_SMC_UL(defaults->bapm_temp_gradient); + def1 = defaults->bapmti_r; + def2 = defaults->bapmti_rc; + + for (i = 0; i < SMU71_DTE_ITERATIONS; i++) { + for (j = 0; j < SMU71_DTE_SOURCES; j++) { + for (k = 0; k < SMU71_DTE_SINKS; k++) { + dpm_table->BAPMTI_R[i][j][k] = PP_HOST_TO_SMC_US(*def1); + dpm_table->BAPMTI_RC[i][j][k] = PP_HOST_TO_SMC_US(*def2); + def1++; + def2++; + } + } + } + + return 0; +} + +static int iceland_populate_smc_svi2_config(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *tab) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) + tab->SVI2Enable |= VDDC_ON_SVI2; + + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) + tab->SVI2Enable |= VDDCI_ON_SVI2; + else + tab->MergedVddci = 1; + + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->mvdd_control) + tab->SVI2Enable |= MVDD_ON_SVI2; + + PP_ASSERT_WITH_CODE(tab->SVI2Enable != (VDDC_ON_SVI2 | VDDCI_ON_SVI2 | MVDD_ON_SVI2) && + (tab->SVI2Enable & VDDC_ON_SVI2), "SVI2 domain configuration is incorrect!", return -EINVAL); + + return 0; +} + +static int iceland_init_smc_table(struct pp_hwmgr *hwmgr) +{ + int result; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + SMU71_Discrete_DpmTable *table = &(smu_data->smc_state_table); + + + iceland_initialize_power_tune_defaults(hwmgr); + memset(&(smu_data->smc_state_table), 0x00, sizeof(smu_data->smc_state_table)); + + if (SMU7_VOLTAGE_CONTROL_NONE != data->voltage_control) { + iceland_populate_smc_voltage_tables(hwmgr, table); + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition)) + table->SystemFlags |= PPSMC_SYSTEMFLAG_GPIO_DC; + + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StepVddc)) + table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC; + + if (data->is_memory_gddr5) + table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5; + + + if (data->ulv_supported) { + result = iceland_populate_ulv_state(hwmgr, &(smu_data->ulv_setting)); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize ULV state!", return result;); + + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixCG_ULV_PARAMETER, 0x40035); + } + + result = iceland_populate_smc_link_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize Link Level!", return result;); + + result = iceland_populate_all_graphic_levels(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize Graphics Level!", return result;); + + result = iceland_populate_all_memory_levels(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize Memory Level!", return result;); + + result = iceland_populate_smc_acpi_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize ACPI Level!", return result;); + + result = iceland_populate_smc_vce_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize VCE Level!", return result;); + + result = iceland_populate_smc_acp_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize ACP Level!", return result;); + + result = iceland_populate_smc_samu_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize SAMU Level!", return result;); + + /* Since only the initial state is completely set up at this point (the other states are just copies of the boot state) we only */ + /* need to populate the ARB settings for the initial state. */ + result = iceland_program_memory_timing_parameters(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to Write ARB settings for the initial state.", return result;); + + result = iceland_populate_smc_uvd_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize UVD Level!", return result;); + + table->GraphicsBootLevel = 0; + table->MemoryBootLevel = 0; + + result = iceland_populate_smc_boot_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize Boot Level!", return result;); + + result = iceland_populate_smc_initial_state(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, "Failed to initialize Boot State!", return result); + + result = iceland_populate_bapm_parameters_in_dpm_table(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, "Failed to populate BAPM Parameters!", return result); + + table->GraphicsVoltageChangeEnable = 1; + table->GraphicsThermThrottleEnable = 1; + table->GraphicsInterval = 1; + table->VoltageInterval = 1; + table->ThermalInterval = 1; + + table->TemperatureLimitHigh = + (data->thermal_temp_setting.temperature_high * + SMU7_Q88_FORMAT_CONVERSION_UNIT) / PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + table->TemperatureLimitLow = + (data->thermal_temp_setting.temperature_low * + SMU7_Q88_FORMAT_CONVERSION_UNIT) / PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + + table->MemoryVoltageChangeEnable = 1; + table->MemoryInterval = 1; + table->VoltageResponseTime = 0; + table->PhaseResponseTime = 0; + table->MemoryThermThrottleEnable = 1; + table->PCIeBootLinkLevel = 0; + table->PCIeGenInterval = 1; + + result = iceland_populate_smc_svi2_config(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to populate SVI2 setting!", return result); + + table->ThermGpio = 17; + table->SclkStepSize = 0x4000; + + CONVERT_FROM_HOST_TO_SMC_UL(table->SystemFlags); + CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMaskVddcVid); + CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMaskVddcPhase); + CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMaskVddciVid); + CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMaskMvddVid); + CONVERT_FROM_HOST_TO_SMC_UL(table->SclkStepSize); + CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitHigh); + CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitLow); + CONVERT_FROM_HOST_TO_SMC_US(table->VoltageResponseTime); + CONVERT_FROM_HOST_TO_SMC_US(table->PhaseResponseTime); + + table->BootVddc = PP_HOST_TO_SMC_US(table->BootVddc * VOLTAGE_SCALE); + table->BootVddci = PP_HOST_TO_SMC_US(table->BootVddci * VOLTAGE_SCALE); + table->BootMVdd = PP_HOST_TO_SMC_US(table->BootMVdd * VOLTAGE_SCALE); + + /* Upload all dpm data to SMC memory.(dpm level, dpm level count etc) */ + result = smu7_copy_bytes_to_smc(hwmgr, smu_data->smu7_data.dpm_table_start + + offsetof(SMU71_Discrete_DpmTable, SystemFlags), + (uint8_t *)&(table->SystemFlags), + sizeof(SMU71_Discrete_DpmTable)-3 * sizeof(SMU71_PIDController), + SMC_RAM_END); + + PP_ASSERT_WITH_CODE(0 == result, + "Failed to upload dpm data to SMC memory!", return result;); + + /* Upload all ulv setting to SMC memory.(dpm level, dpm level count etc) */ + result = smu7_copy_bytes_to_smc(hwmgr, + smu_data->smu7_data.ulv_setting_starts, + (uint8_t *)&(smu_data->ulv_setting), + sizeof(SMU71_Discrete_Ulv), + SMC_RAM_END); + + + result = iceland_populate_initial_mc_reg_table(hwmgr); + PP_ASSERT_WITH_CODE((0 == result), + "Failed to populate initialize MC Reg table!", return result); + + result = iceland_populate_pm_fuses(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to populate PM fuses to SMC memory!", return result); + + return 0; +} + +int iceland_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) +{ + struct smu7_smumgr *smu7_data = (struct smu7_smumgr *)(hwmgr->smu_backend); + SMU71_Discrete_FanTable fan_table = { FDO_MODE_HARDWARE }; + uint32_t duty100; + uint32_t t_diff1, t_diff2, pwm_diff1, pwm_diff2; + uint16_t fdo_min, slope1, slope2; + uint32_t reference_clock; + int res; + uint64_t tmp64; + + if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl)) + return 0; + + if (hwmgr->thermal_controller.fanInfo.bNoFan) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + + if (0 == smu7_data->fan_table_start) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + + duty100 = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_FDO_CTRL1, FMAX_DUTY100); + + if (0 == duty100) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + + tmp64 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMMin * duty100; + do_div(tmp64, 10000); + fdo_min = (uint16_t)tmp64; + + t_diff1 = hwmgr->thermal_controller.advanceFanControlParameters.usTMed - hwmgr->thermal_controller.advanceFanControlParameters.usTMin; + t_diff2 = hwmgr->thermal_controller.advanceFanControlParameters.usTHigh - hwmgr->thermal_controller.advanceFanControlParameters.usTMed; + + pwm_diff1 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMMed - hwmgr->thermal_controller.advanceFanControlParameters.usPWMMin; + pwm_diff2 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMHigh - hwmgr->thermal_controller.advanceFanControlParameters.usPWMMed; + + slope1 = (uint16_t)((50 + ((16 * duty100 * pwm_diff1) / t_diff1)) / 100); + slope2 = (uint16_t)((50 + ((16 * duty100 * pwm_diff2) / t_diff2)) / 100); + + fan_table.TempMin = cpu_to_be16((50 + hwmgr->thermal_controller.advanceFanControlParameters.usTMin) / 100); + fan_table.TempMed = cpu_to_be16((50 + hwmgr->thermal_controller.advanceFanControlParameters.usTMed) / 100); + fan_table.TempMax = cpu_to_be16((50 + hwmgr->thermal_controller.advanceFanControlParameters.usTMax) / 100); + + fan_table.Slope1 = cpu_to_be16(slope1); + fan_table.Slope2 = cpu_to_be16(slope2); + + fan_table.FdoMin = cpu_to_be16(fdo_min); + + fan_table.HystDown = cpu_to_be16(hwmgr->thermal_controller.advanceFanControlParameters.ucTHyst); + + fan_table.HystUp = cpu_to_be16(1); + + fan_table.HystSlope = cpu_to_be16(1); + + fan_table.TempRespLim = cpu_to_be16(5); + + reference_clock = smu7_get_xclk(hwmgr); + + fan_table.RefreshPeriod = cpu_to_be32((hwmgr->thermal_controller.advanceFanControlParameters.ulCycleDelay * reference_clock) / 1600); + + fan_table.FdoMax = cpu_to_be16((uint16_t)duty100); + + fan_table.TempSrc = (uint8_t)PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_MULT_THERMAL_CTRL, TEMP_SEL); + + /* fan_table.FanControl_GL_Flag = 1; */ + + res = smu7_copy_bytes_to_smc(hwmgr, smu7_data->fan_table_start, (uint8_t *)&fan_table, (uint32_t)sizeof(fan_table), SMC_RAM_END); + + return 0; +} + + +static int iceland_program_mem_timing_parameters(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + if (data->need_update_smu7_dpm_table & + (DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_OD_UPDATE_MCLK)) + return iceland_program_memory_timing_parameters(hwmgr); + + return 0; +} + +static int iceland_update_sclk_threshold(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + + int result = 0; + uint32_t low_sclk_interrupt_threshold = 0; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SclkThrottleLowNotification) + && (hwmgr->gfx_arbiter.sclk_threshold != + data->low_sclk_interrupt_threshold)) { + data->low_sclk_interrupt_threshold = + hwmgr->gfx_arbiter.sclk_threshold; + low_sclk_interrupt_threshold = + data->low_sclk_interrupt_threshold; + + CONVERT_FROM_HOST_TO_SMC_UL(low_sclk_interrupt_threshold); + + result = smu7_copy_bytes_to_smc( + hwmgr, + smu_data->smu7_data.dpm_table_start + + offsetof(SMU71_Discrete_DpmTable, + LowSclkInterruptThreshold), + (uint8_t *)&low_sclk_interrupt_threshold, + sizeof(uint32_t), + SMC_RAM_END); + } + + result = iceland_update_and_upload_mc_reg_table(hwmgr); + + PP_ASSERT_WITH_CODE((0 == result), "Failed to upload MC reg table!", return result); + + result = iceland_program_mem_timing_parameters(hwmgr); + PP_ASSERT_WITH_CODE((result == 0), + "Failed to program memory timing parameters!", + ); + + return result; +} + +static uint32_t iceland_get_offsetof(uint32_t type, uint32_t member) +{ + switch (type) { + case SMU_SoftRegisters: + switch (member) { + case HandshakeDisables: + return offsetof(SMU71_SoftRegisters, HandshakeDisables); + case VoltageChangeTimeout: + return offsetof(SMU71_SoftRegisters, VoltageChangeTimeout); + case AverageGraphicsActivity: + return offsetof(SMU71_SoftRegisters, AverageGraphicsActivity); + case PreVBlankGap: + return offsetof(SMU71_SoftRegisters, PreVBlankGap); + case VBlankTimeout: + return offsetof(SMU71_SoftRegisters, VBlankTimeout); + case UcodeLoadStatus: + return offsetof(SMU71_SoftRegisters, UcodeLoadStatus); + case DRAM_LOG_ADDR_H: + return offsetof(SMU71_SoftRegisters, DRAM_LOG_ADDR_H); + case DRAM_LOG_ADDR_L: + return offsetof(SMU71_SoftRegisters, DRAM_LOG_ADDR_L); + case DRAM_LOG_PHY_ADDR_H: + return offsetof(SMU71_SoftRegisters, DRAM_LOG_PHY_ADDR_H); + case DRAM_LOG_PHY_ADDR_L: + return offsetof(SMU71_SoftRegisters, DRAM_LOG_PHY_ADDR_L); + case DRAM_LOG_BUFF_SIZE: + return offsetof(SMU71_SoftRegisters, DRAM_LOG_BUFF_SIZE); + } + case SMU_Discrete_DpmTable: + switch (member) { + case LowSclkInterruptThreshold: + return offsetof(SMU71_Discrete_DpmTable, LowSclkInterruptThreshold); + } + } + pr_warn("can't get the offset of type %x member %x\n", type, member); + return 0; +} + +static uint32_t iceland_get_mac_definition(uint32_t value) +{ + switch (value) { + case SMU_MAX_LEVELS_GRAPHICS: + return SMU71_MAX_LEVELS_GRAPHICS; + case SMU_MAX_LEVELS_MEMORY: + return SMU71_MAX_LEVELS_MEMORY; + case SMU_MAX_LEVELS_LINK: + return SMU71_MAX_LEVELS_LINK; + case SMU_MAX_ENTRIES_SMIO: + return SMU71_MAX_ENTRIES_SMIO; + case SMU_MAX_LEVELS_VDDC: + return SMU71_MAX_LEVELS_VDDC; + case SMU_MAX_LEVELS_VDDCI: + return SMU71_MAX_LEVELS_VDDCI; + case SMU_MAX_LEVELS_MVDD: + return SMU71_MAX_LEVELS_MVDD; + } + + pr_warn("can't get the mac of %x\n", value); + return 0; +} + +static int iceland_process_firmware_header(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct smu7_smumgr *smu7_data = (struct smu7_smumgr *)(hwmgr->smu_backend); + + uint32_t tmp; + int result; + bool error = false; + + result = smu7_read_smc_sram_dword(hwmgr, + SMU71_FIRMWARE_HEADER_LOCATION + + offsetof(SMU71_Firmware_Header, DpmTable), + &tmp, SMC_RAM_END); + + if (0 == result) { + smu7_data->dpm_table_start = tmp; + } + + error |= (0 != result); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU71_FIRMWARE_HEADER_LOCATION + + offsetof(SMU71_Firmware_Header, SoftRegisters), + &tmp, SMC_RAM_END); + + if (0 == result) { + data->soft_regs_start = tmp; + smu7_data->soft_regs_start = tmp; + } + + error |= (0 != result); + + + result = smu7_read_smc_sram_dword(hwmgr, + SMU71_FIRMWARE_HEADER_LOCATION + + offsetof(SMU71_Firmware_Header, mcRegisterTable), + &tmp, SMC_RAM_END); + + if (0 == result) { + smu7_data->mc_reg_table_start = tmp; + } + + result = smu7_read_smc_sram_dword(hwmgr, + SMU71_FIRMWARE_HEADER_LOCATION + + offsetof(SMU71_Firmware_Header, FanTable), + &tmp, SMC_RAM_END); + + if (0 == result) { + smu7_data->fan_table_start = tmp; + } + + error |= (0 != result); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU71_FIRMWARE_HEADER_LOCATION + + offsetof(SMU71_Firmware_Header, mcArbDramTimingTable), + &tmp, SMC_RAM_END); + + if (0 == result) { + smu7_data->arb_table_start = tmp; + } + + error |= (0 != result); + + + result = smu7_read_smc_sram_dword(hwmgr, + SMU71_FIRMWARE_HEADER_LOCATION + + offsetof(SMU71_Firmware_Header, Version), + &tmp, SMC_RAM_END); + + if (0 == result) { + hwmgr->microcode_version_info.SMC = tmp; + } + + error |= (0 != result); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU71_FIRMWARE_HEADER_LOCATION + + offsetof(SMU71_Firmware_Header, UlvSettings), + &tmp, SMC_RAM_END); + + if (0 == result) { + smu7_data->ulv_setting_starts = tmp; + } + + error |= (0 != result); + + return error ? 1 : 0; +} + +/*---------------------------MC----------------------------*/ + +static uint8_t iceland_get_memory_modile_index(struct pp_hwmgr *hwmgr) +{ + return (uint8_t) (0xFF & (cgs_read_register(hwmgr->device, mmBIOS_SCRATCH_4) >> 16)); +} + +static bool iceland_check_s0_mc_reg_index(uint16_t in_reg, uint16_t *out_reg) +{ + bool result = true; + + switch (in_reg) { + case mmMC_SEQ_RAS_TIMING: + *out_reg = mmMC_SEQ_RAS_TIMING_LP; + break; + + case mmMC_SEQ_DLL_STBY: + *out_reg = mmMC_SEQ_DLL_STBY_LP; + break; + + case mmMC_SEQ_G5PDX_CMD0: + *out_reg = mmMC_SEQ_G5PDX_CMD0_LP; + break; + + case mmMC_SEQ_G5PDX_CMD1: + *out_reg = mmMC_SEQ_G5PDX_CMD1_LP; + break; + + case mmMC_SEQ_G5PDX_CTRL: + *out_reg = mmMC_SEQ_G5PDX_CTRL_LP; + break; + + case mmMC_SEQ_CAS_TIMING: + *out_reg = mmMC_SEQ_CAS_TIMING_LP; + break; + + case mmMC_SEQ_MISC_TIMING: + *out_reg = mmMC_SEQ_MISC_TIMING_LP; + break; + + case mmMC_SEQ_MISC_TIMING2: + *out_reg = mmMC_SEQ_MISC_TIMING2_LP; + break; + + case mmMC_SEQ_PMG_DVS_CMD: + *out_reg = mmMC_SEQ_PMG_DVS_CMD_LP; + break; + + case mmMC_SEQ_PMG_DVS_CTL: + *out_reg = mmMC_SEQ_PMG_DVS_CTL_LP; + break; + + case mmMC_SEQ_RD_CTL_D0: + *out_reg = mmMC_SEQ_RD_CTL_D0_LP; + break; + + case mmMC_SEQ_RD_CTL_D1: + *out_reg = mmMC_SEQ_RD_CTL_D1_LP; + break; + + case mmMC_SEQ_WR_CTL_D0: + *out_reg = mmMC_SEQ_WR_CTL_D0_LP; + break; + + case mmMC_SEQ_WR_CTL_D1: + *out_reg = mmMC_SEQ_WR_CTL_D1_LP; + break; + + case mmMC_PMG_CMD_EMRS: + *out_reg = mmMC_SEQ_PMG_CMD_EMRS_LP; + break; + + case mmMC_PMG_CMD_MRS: + *out_reg = mmMC_SEQ_PMG_CMD_MRS_LP; + break; + + case mmMC_PMG_CMD_MRS1: + *out_reg = mmMC_SEQ_PMG_CMD_MRS1_LP; + break; + + case mmMC_SEQ_PMG_TIMING: + *out_reg = mmMC_SEQ_PMG_TIMING_LP; + break; + + case mmMC_PMG_CMD_MRS2: + *out_reg = mmMC_SEQ_PMG_CMD_MRS2_LP; + break; + + case mmMC_SEQ_WR_CTL_2: + *out_reg = mmMC_SEQ_WR_CTL_2_LP; + break; + + default: + result = false; + break; + } + + return result; +} + +static int iceland_set_s0_mc_reg_index(struct iceland_mc_reg_table *table) +{ + uint32_t i; + uint16_t address; + + for (i = 0; i < table->last; i++) { + table->mc_reg_address[i].s0 = + iceland_check_s0_mc_reg_index(table->mc_reg_address[i].s1, &address) + ? address : table->mc_reg_address[i].s1; + } + return 0; +} + +static int iceland_copy_vbios_smc_reg_table(const pp_atomctrl_mc_reg_table *table, + struct iceland_mc_reg_table *ni_table) +{ + uint8_t i, j; + + PP_ASSERT_WITH_CODE((table->last <= SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE), + "Invalid VramInfo table.", return -EINVAL); + PP_ASSERT_WITH_CODE((table->num_entries <= MAX_AC_TIMING_ENTRIES), + "Invalid VramInfo table.", return -EINVAL); + + for (i = 0; i < table->last; i++) { + ni_table->mc_reg_address[i].s1 = table->mc_reg_address[i].s1; + } + ni_table->last = table->last; + + for (i = 0; i < table->num_entries; i++) { + ni_table->mc_reg_table_entry[i].mclk_max = + table->mc_reg_table_entry[i].mclk_max; + for (j = 0; j < table->last; j++) { + ni_table->mc_reg_table_entry[i].mc_data[j] = + table->mc_reg_table_entry[i].mc_data[j]; + } + } + + ni_table->num_entries = table->num_entries; + + return 0; +} + +static int iceland_set_mc_special_registers(struct pp_hwmgr *hwmgr, + struct iceland_mc_reg_table *table) +{ + uint8_t i, j, k; + uint32_t temp_reg; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + for (i = 0, j = table->last; i < table->last; i++) { + PP_ASSERT_WITH_CODE((j < SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE), + "Invalid VramInfo table.", return -EINVAL); + + switch (table->mc_reg_address[i].s1) { + + case mmMC_SEQ_MISC1: + temp_reg = cgs_read_register(hwmgr->device, mmMC_PMG_CMD_EMRS); + table->mc_reg_address[j].s1 = mmMC_PMG_CMD_EMRS; + table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_EMRS_LP; + for (k = 0; k < table->num_entries; k++) { + table->mc_reg_table_entry[k].mc_data[j] = + ((temp_reg & 0xffff0000)) | + ((table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16); + } + j++; + PP_ASSERT_WITH_CODE((j < SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE), + "Invalid VramInfo table.", return -EINVAL); + + temp_reg = cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS); + table->mc_reg_address[j].s1 = mmMC_PMG_CMD_MRS; + table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_MRS_LP; + for (k = 0; k < table->num_entries; k++) { + table->mc_reg_table_entry[k].mc_data[j] = + (temp_reg & 0xffff0000) | + (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); + + if (!data->is_memory_gddr5) { + table->mc_reg_table_entry[k].mc_data[j] |= 0x100; + } + } + j++; + PP_ASSERT_WITH_CODE((j <= SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE), + "Invalid VramInfo table.", return -EINVAL); + + if (!data->is_memory_gddr5 && j < SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE) { + table->mc_reg_address[j].s1 = mmMC_PMG_AUTO_CMD; + table->mc_reg_address[j].s0 = mmMC_PMG_AUTO_CMD; + for (k = 0; k < table->num_entries; k++) { + table->mc_reg_table_entry[k].mc_data[j] = + (table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16; + } + j++; + PP_ASSERT_WITH_CODE((j <= SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE), + "Invalid VramInfo table.", return -EINVAL); + } + + break; + + case mmMC_SEQ_RESERVE_M: + temp_reg = cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS1); + table->mc_reg_address[j].s1 = mmMC_PMG_CMD_MRS1; + table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_MRS1_LP; + for (k = 0; k < table->num_entries; k++) { + table->mc_reg_table_entry[k].mc_data[j] = + (temp_reg & 0xffff0000) | + (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); + } + j++; + PP_ASSERT_WITH_CODE((j <= SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE), + "Invalid VramInfo table.", return -EINVAL); + break; + + default: + break; + } + + } + + table->last = j; + + return 0; +} + +static int iceland_set_valid_flag(struct iceland_mc_reg_table *table) +{ + uint8_t i, j; + for (i = 0; i < table->last; i++) { + for (j = 1; j < table->num_entries; j++) { + if (table->mc_reg_table_entry[j-1].mc_data[i] != + table->mc_reg_table_entry[j].mc_data[i]) { + table->validflag |= (1<<i); + break; + } + } + } + + return 0; +} + +static int iceland_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) +{ + int result; + struct iceland_smumgr *smu_data = (struct iceland_smumgr *)(hwmgr->smu_backend); + pp_atomctrl_mc_reg_table *table; + struct iceland_mc_reg_table *ni_table = &smu_data->mc_reg_table; + uint8_t module_index = iceland_get_memory_modile_index(hwmgr); + + table = kzalloc(sizeof(pp_atomctrl_mc_reg_table), GFP_KERNEL); + + if (NULL == table) + return -ENOMEM; + + /* Program additional LP registers that are no longer programmed by VBIOS */ + cgs_write_register(hwmgr->device, mmMC_SEQ_RAS_TIMING_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_RAS_TIMING)); + cgs_write_register(hwmgr->device, mmMC_SEQ_CAS_TIMING_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_CAS_TIMING)); + cgs_write_register(hwmgr->device, mmMC_SEQ_DLL_STBY_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_DLL_STBY)); + cgs_write_register(hwmgr->device, mmMC_SEQ_G5PDX_CMD0_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_G5PDX_CMD0)); + cgs_write_register(hwmgr->device, mmMC_SEQ_G5PDX_CMD1_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_G5PDX_CMD1)); + cgs_write_register(hwmgr->device, mmMC_SEQ_G5PDX_CTRL_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_G5PDX_CTRL)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_DVS_CMD_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_PMG_DVS_CMD)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_DVS_CTL_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_PMG_DVS_CTL)); + cgs_write_register(hwmgr->device, mmMC_SEQ_MISC_TIMING_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_MISC_TIMING)); + cgs_write_register(hwmgr->device, mmMC_SEQ_MISC_TIMING2_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_MISC_TIMING2)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_EMRS_LP, cgs_read_register(hwmgr->device, mmMC_PMG_CMD_EMRS)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_MRS_LP, cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_MRS1_LP, cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS1)); + cgs_write_register(hwmgr->device, mmMC_SEQ_WR_CTL_D0_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_WR_CTL_D0)); + cgs_write_register(hwmgr->device, mmMC_SEQ_WR_CTL_D1_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_WR_CTL_D1)); + cgs_write_register(hwmgr->device, mmMC_SEQ_RD_CTL_D0_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_RD_CTL_D0)); + cgs_write_register(hwmgr->device, mmMC_SEQ_RD_CTL_D1_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_RD_CTL_D1)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_TIMING_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_PMG_TIMING)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_MRS2_LP, cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS2)); + cgs_write_register(hwmgr->device, mmMC_SEQ_WR_CTL_2_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_WR_CTL_2)); + + memset(table, 0x00, sizeof(pp_atomctrl_mc_reg_table)); + + result = atomctrl_initialize_mc_reg_table(hwmgr, module_index, table); + + if (0 == result) + result = iceland_copy_vbios_smc_reg_table(table, ni_table); + + if (0 == result) { + iceland_set_s0_mc_reg_index(ni_table); + result = iceland_set_mc_special_registers(hwmgr, ni_table); + } + + if (0 == result) + iceland_set_valid_flag(ni_table); + + kfree(table); + + return result; +} + +static bool iceland_is_dpm_running(struct pp_hwmgr *hwmgr) +{ + return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) + ? true : false; +} + const struct pp_smumgr_func iceland_smu_funcs = { .smu_init = &iceland_smu_init, .smu_fini = &smu7_smu_fini, diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c deleted file mode 100644 index c92ea38d2e15..000000000000 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c +++ /dev/null @@ -1,2344 +0,0 @@ -/* - * Copyright 2015 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "pp_debug.h" -#include "polaris10_smc.h" -#include "smu7_dyn_defaults.h" - -#include "smu7_hwmgr.h" -#include "hardwaremanager.h" -#include "ppatomctrl.h" -#include "cgs_common.h" -#include "atombios.h" -#include "polaris10_smumgr.h" -#include "pppcielanes.h" - -#include "smu_ucode_xfer_vi.h" -#include "smu74_discrete.h" -#include "smu/smu_7_1_3_d.h" -#include "smu/smu_7_1_3_sh_mask.h" -#include "gmc/gmc_8_1_d.h" -#include "gmc/gmc_8_1_sh_mask.h" -#include "oss/oss_3_0_d.h" -#include "gca/gfx_8_0_d.h" -#include "bif/bif_5_0_d.h" -#include "bif/bif_5_0_sh_mask.h" -#include "dce/dce_10_0_d.h" -#include "dce/dce_10_0_sh_mask.h" -#include "polaris10_pwrvirus.h" -#include "smu7_ppsmc.h" -#include "smu7_smumgr.h" - -#define POLARIS10_SMC_SIZE 0x20000 -#define VOLTAGE_VID_OFFSET_SCALE1 625 -#define VOLTAGE_VID_OFFSET_SCALE2 100 -#define POWERTUNE_DEFAULT_SET_MAX 1 -#define VDDC_VDDCI_DELTA 200 -#define MC_CG_ARB_FREQ_F1 0x0b - -static const struct polaris10_pt_defaults polaris10_power_tune_data_set_array[POWERTUNE_DEFAULT_SET_MAX] = { - /* sviLoadLIneEn, SviLoadLineVddC, TDC_VDDC_ThrottleReleaseLimitPerc, TDC_MAWt, - * TdcWaterfallCtl, DTEAmbientTempBase, DisplayCac, BAPM_TEMP_GRADIENT */ - { 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0xB0000, - { 0x79, 0x253, 0x25D, 0xAE, 0x72, 0x80, 0x83, 0x86, 0x6F, 0xC8, 0xC9, 0xC9, 0x2F, 0x4D, 0x61}, - { 0x17C, 0x172, 0x180, 0x1BC, 0x1B3, 0x1BD, 0x206, 0x200, 0x203, 0x25D, 0x25A, 0x255, 0x2C3, 0x2C5, 0x2B4 } }, -}; - -static const sclkFcwRange_t Range_Table[NUM_SCLK_RANGE] = { - {VCO_2_4, POSTDIV_DIV_BY_16, 75, 160, 112}, - {VCO_3_6, POSTDIV_DIV_BY_16, 112, 224, 160}, - {VCO_2_4, POSTDIV_DIV_BY_8, 75, 160, 112}, - {VCO_3_6, POSTDIV_DIV_BY_8, 112, 224, 160}, - {VCO_2_4, POSTDIV_DIV_BY_4, 75, 160, 112}, - {VCO_3_6, POSTDIV_DIV_BY_4, 112, 216, 160}, - {VCO_2_4, POSTDIV_DIV_BY_2, 75, 160, 108}, - {VCO_3_6, POSTDIV_DIV_BY_2, 112, 216, 160} }; - -static int polaris10_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr, - struct phm_ppt_v1_clock_voltage_dependency_table *dep_table, - uint32_t clock, SMU_VoltageLevel *voltage, uint32_t *mvdd) -{ - uint32_t i; - uint16_t vddci; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - *voltage = *mvdd = 0; - - /* clock - voltage dependency table is empty table */ - if (dep_table->count == 0) - return -EINVAL; - - for (i = 0; i < dep_table->count; i++) { - /* find first sclk bigger than request */ - if (dep_table->entries[i].clk >= clock) { - *voltage |= (dep_table->entries[i].vddc * - VOLTAGE_SCALE) << VDDC_SHIFT; - if (SMU7_VOLTAGE_CONTROL_NONE == data->vddci_control) - *voltage |= (data->vbios_boot_state.vddci_bootup_value * - VOLTAGE_SCALE) << VDDCI_SHIFT; - else if (dep_table->entries[i].vddci) - *voltage |= (dep_table->entries[i].vddci * - VOLTAGE_SCALE) << VDDCI_SHIFT; - else { - vddci = phm_find_closest_vddci(&(data->vddci_voltage_table), - (dep_table->entries[i].vddc - - (uint16_t)VDDC_VDDCI_DELTA)); - *voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; - } - - if (SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) - *mvdd = data->vbios_boot_state.mvdd_bootup_value * - VOLTAGE_SCALE; - else if (dep_table->entries[i].mvdd) - *mvdd = (uint32_t) dep_table->entries[i].mvdd * - VOLTAGE_SCALE; - - *voltage |= 1 << PHASES_SHIFT; - return 0; - } - } - - /* sclk is bigger than max sclk in the dependence table */ - *voltage |= (dep_table->entries[i - 1].vddc * VOLTAGE_SCALE) << VDDC_SHIFT; - - if (SMU7_VOLTAGE_CONTROL_NONE == data->vddci_control) - *voltage |= (data->vbios_boot_state.vddci_bootup_value * - VOLTAGE_SCALE) << VDDCI_SHIFT; - else if (dep_table->entries[i-1].vddci) { - vddci = phm_find_closest_vddci(&(data->vddci_voltage_table), - (dep_table->entries[i].vddc - - (uint16_t)VDDC_VDDCI_DELTA)); - *voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; - } - - if (SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) - *mvdd = data->vbios_boot_state.mvdd_bootup_value * VOLTAGE_SCALE; - else if (dep_table->entries[i].mvdd) - *mvdd = (uint32_t) dep_table->entries[i - 1].mvdd * VOLTAGE_SCALE; - - return 0; -} - -static uint16_t scale_fan_gain_settings(uint16_t raw_setting) -{ - uint32_t tmp; - tmp = raw_setting * 4096 / 100; - return (uint16_t)tmp; -} - -static int polaris10_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr) -{ - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - - const struct polaris10_pt_defaults *defaults = smu_data->power_tune_defaults; - SMU74_Discrete_DpmTable *table = &(smu_data->smc_state_table); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_cac_tdp_table *cac_dtp_table = table_info->cac_dtp_table; - struct pp_advance_fan_control_parameters *fan_table = - &hwmgr->thermal_controller.advanceFanControlParameters; - int i, j, k; - const uint16_t *pdef1; - const uint16_t *pdef2; - - table->DefaultTdp = PP_HOST_TO_SMC_US((uint16_t)(cac_dtp_table->usTDP * 128)); - table->TargetTdp = PP_HOST_TO_SMC_US((uint16_t)(cac_dtp_table->usTDP * 128)); - - PP_ASSERT_WITH_CODE(cac_dtp_table->usTargetOperatingTemp <= 255, - "Target Operating Temp is out of Range!", - ); - - table->TemperatureLimitEdge = PP_HOST_TO_SMC_US( - cac_dtp_table->usTargetOperatingTemp * 256); - table->TemperatureLimitHotspot = PP_HOST_TO_SMC_US( - cac_dtp_table->usTemperatureLimitHotspot * 256); - table->FanGainEdge = PP_HOST_TO_SMC_US( - scale_fan_gain_settings(fan_table->usFanGainEdge)); - table->FanGainHotspot = PP_HOST_TO_SMC_US( - scale_fan_gain_settings(fan_table->usFanGainHotspot)); - - pdef1 = defaults->BAPMTI_R; - pdef2 = defaults->BAPMTI_RC; - - for (i = 0; i < SMU74_DTE_ITERATIONS; i++) { - for (j = 0; j < SMU74_DTE_SOURCES; j++) { - for (k = 0; k < SMU74_DTE_SINKS; k++) { - table->BAPMTI_R[i][j][k] = PP_HOST_TO_SMC_US(*pdef1); - table->BAPMTI_RC[i][j][k] = PP_HOST_TO_SMC_US(*pdef2); - pdef1++; - pdef2++; - } - } - } - - return 0; -} - -static int polaris10_populate_svi_load_line(struct pp_hwmgr *hwmgr) -{ - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - const struct polaris10_pt_defaults *defaults = smu_data->power_tune_defaults; - - smu_data->power_tune_table.SviLoadLineEn = defaults->SviLoadLineEn; - smu_data->power_tune_table.SviLoadLineVddC = defaults->SviLoadLineVddC; - smu_data->power_tune_table.SviLoadLineTrimVddC = 3; - smu_data->power_tune_table.SviLoadLineOffsetVddC = 0; - - return 0; -} - -static int polaris10_populate_tdc_limit(struct pp_hwmgr *hwmgr) -{ - uint16_t tdc_limit; - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - const struct polaris10_pt_defaults *defaults = smu_data->power_tune_defaults; - - tdc_limit = (uint16_t)(table_info->cac_dtp_table->usTDC * 128); - smu_data->power_tune_table.TDC_VDDC_PkgLimit = - CONVERT_FROM_HOST_TO_SMC_US(tdc_limit); - smu_data->power_tune_table.TDC_VDDC_ThrottleReleaseLimitPerc = - defaults->TDC_VDDC_ThrottleReleaseLimitPerc; - smu_data->power_tune_table.TDC_MAWt = defaults->TDC_MAWt; - - return 0; -} - -static int polaris10_populate_dw8(struct pp_hwmgr *hwmgr, uint32_t fuse_table_offset) -{ - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - const struct polaris10_pt_defaults *defaults = smu_data->power_tune_defaults; - uint32_t temp; - - if (smu7_read_smc_sram_dword(hwmgr, - fuse_table_offset + - offsetof(SMU74_Discrete_PmFuses, TdcWaterfallCtl), - (uint32_t *)&temp, SMC_RAM_END)) - PP_ASSERT_WITH_CODE(false, - "Attempt to read PmFuses.DW6 (SviLoadLineEn) from SMC Failed!", - return -EINVAL); - else { - smu_data->power_tune_table.TdcWaterfallCtl = defaults->TdcWaterfallCtl; - smu_data->power_tune_table.LPMLTemperatureMin = - (uint8_t)((temp >> 16) & 0xff); - smu_data->power_tune_table.LPMLTemperatureMax = - (uint8_t)((temp >> 8) & 0xff); - smu_data->power_tune_table.Reserved = (uint8_t)(temp & 0xff); - } - return 0; -} - -static int polaris10_populate_temperature_scaler(struct pp_hwmgr *hwmgr) -{ - int i; - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - - /* Currently not used. Set all to zero. */ - for (i = 0; i < 16; i++) - smu_data->power_tune_table.LPMLTemperatureScaler[i] = 0; - - return 0; -} - -static int polaris10_populate_fuzzy_fan(struct pp_hwmgr *hwmgr) -{ - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - -/* TO DO move to hwmgr */ - if ((hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity & (1 << 15)) - || 0 == hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity) - hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity = - hwmgr->thermal_controller.advanceFanControlParameters.usDefaultFanOutputSensitivity; - - smu_data->power_tune_table.FuzzyFan_PwmSetDelta = PP_HOST_TO_SMC_US( - hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity); - return 0; -} - -static int polaris10_populate_gnb_lpml(struct pp_hwmgr *hwmgr) -{ - int i; - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - - /* Currently not used. Set all to zero. */ - for (i = 0; i < 16; i++) - smu_data->power_tune_table.GnbLPML[i] = 0; - - return 0; -} - -static int polaris10_populate_bapm_vddc_base_leakage_sidd(struct pp_hwmgr *hwmgr) -{ - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - uint16_t hi_sidd = smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd; - uint16_t lo_sidd = smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd; - struct phm_cac_tdp_table *cac_table = table_info->cac_dtp_table; - - hi_sidd = (uint16_t)(cac_table->usHighCACLeakage / 100 * 256); - lo_sidd = (uint16_t)(cac_table->usLowCACLeakage / 100 * 256); - - smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd = - CONVERT_FROM_HOST_TO_SMC_US(hi_sidd); - smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd = - CONVERT_FROM_HOST_TO_SMC_US(lo_sidd); - - return 0; -} - -static int polaris10_populate_pm_fuses(struct pp_hwmgr *hwmgr) -{ - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - uint32_t pm_fuse_table_offset; - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_PowerContainment)) { - if (smu7_read_smc_sram_dword(hwmgr, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU74_Firmware_Header, PmFuseTable), - &pm_fuse_table_offset, SMC_RAM_END)) - PP_ASSERT_WITH_CODE(false, - "Attempt to get pm_fuse_table_offset Failed!", - return -EINVAL); - - if (polaris10_populate_svi_load_line(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate SviLoadLine Failed!", - return -EINVAL); - - if (polaris10_populate_tdc_limit(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate TDCLimit Failed!", return -EINVAL); - - if (polaris10_populate_dw8(hwmgr, pm_fuse_table_offset)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate TdcWaterfallCtl, " - "LPMLTemperature Min and Max Failed!", - return -EINVAL); - - if (0 != polaris10_populate_temperature_scaler(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate LPMLTemperatureScaler Failed!", - return -EINVAL); - - if (polaris10_populate_fuzzy_fan(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate Fuzzy Fan Control parameters Failed!", - return -EINVAL); - - if (polaris10_populate_gnb_lpml(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate GnbLPML Failed!", - return -EINVAL); - - if (polaris10_populate_bapm_vddc_base_leakage_sidd(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate BapmVddCBaseLeakage Hi and Lo " - "Sidd Failed!", return -EINVAL); - - if (smu7_copy_bytes_to_smc(hwmgr, pm_fuse_table_offset, - (uint8_t *)&smu_data->power_tune_table, - (sizeof(struct SMU74_Discrete_PmFuses) - 92), SMC_RAM_END)) - PP_ASSERT_WITH_CODE(false, - "Attempt to download PmFuseTable Failed!", - return -EINVAL); - } - return 0; -} - -/** - * Mvdd table preparation for SMC. - * - * @param *hwmgr The address of the hardware manager. - * @param *table The SMC DPM table structure to be populated. - * @return 0 - */ -static int polaris10_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr, - SMU74_Discrete_DpmTable *table) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - uint32_t count, level; - - if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) { - count = data->mvdd_voltage_table.count; - if (count > SMU_MAX_SMIO_LEVELS) - count = SMU_MAX_SMIO_LEVELS; - for (level = 0; level < count; level++) { - table->SmioTable2.Pattern[level].Voltage = - PP_HOST_TO_SMC_US(data->mvdd_voltage_table.entries[count].value * VOLTAGE_SCALE); - /* Index into DpmTable.Smio. Drive bits from Smio entry to get this voltage level.*/ - table->SmioTable2.Pattern[level].Smio = - (uint8_t) level; - table->Smio[level] |= - data->mvdd_voltage_table.entries[level].smio_low; - } - table->SmioMask2 = data->mvdd_voltage_table.mask_low; - - table->MvddLevelCount = (uint32_t) PP_HOST_TO_SMC_UL(count); - } - - return 0; -} - -static int polaris10_populate_smc_vddci_table(struct pp_hwmgr *hwmgr, - struct SMU74_Discrete_DpmTable *table) -{ - uint32_t count, level; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - count = data->vddci_voltage_table.count; - - if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) { - if (count > SMU_MAX_SMIO_LEVELS) - count = SMU_MAX_SMIO_LEVELS; - for (level = 0; level < count; ++level) { - table->SmioTable1.Pattern[level].Voltage = - PP_HOST_TO_SMC_US(data->vddci_voltage_table.entries[level].value * VOLTAGE_SCALE); - table->SmioTable1.Pattern[level].Smio = (uint8_t) level; - - table->Smio[level] |= data->vddci_voltage_table.entries[level].smio_low; - } - } - - table->SmioMask1 = data->vddci_voltage_table.mask_low; - - return 0; -} - -/** -* Preparation of vddc and vddgfx CAC tables for SMC. -* -* @param hwmgr the address of the hardware manager -* @param table the SMC DPM table structure to be populated -* @return always 0 -*/ -static int polaris10_populate_cac_table(struct pp_hwmgr *hwmgr, - struct SMU74_Discrete_DpmTable *table) -{ - uint32_t count; - uint8_t index; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_ppt_v1_voltage_lookup_table *lookup_table = - table_info->vddc_lookup_table; - /* tables is already swapped, so in order to use the value from it, - * we need to swap it back. - * We are populating vddc CAC data to BapmVddc table - * in split and merged mode - */ - for (count = 0; count < lookup_table->count; count++) { - index = phm_get_voltage_index(lookup_table, - data->vddc_voltage_table.entries[count].value); - table->BapmVddcVidLoSidd[count] = convert_to_vid(lookup_table->entries[index].us_cac_low); - table->BapmVddcVidHiSidd[count] = convert_to_vid(lookup_table->entries[index].us_cac_mid); - table->BapmVddcVidHiSidd2[count] = convert_to_vid(lookup_table->entries[index].us_cac_high); - } - - return 0; -} - -/** -* Preparation of voltage tables for SMC. -* -* @param hwmgr the address of the hardware manager -* @param table the SMC DPM table structure to be populated -* @return always 0 -*/ - -static int polaris10_populate_smc_voltage_tables(struct pp_hwmgr *hwmgr, - struct SMU74_Discrete_DpmTable *table) -{ - polaris10_populate_smc_vddci_table(hwmgr, table); - polaris10_populate_smc_mvdd_table(hwmgr, table); - polaris10_populate_cac_table(hwmgr, table); - - return 0; -} - -static int polaris10_populate_ulv_level(struct pp_hwmgr *hwmgr, - struct SMU74_Discrete_Ulv *state) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - - state->CcPwrDynRm = 0; - state->CcPwrDynRm1 = 0; - - state->VddcOffset = (uint16_t) table_info->us_ulv_voltage_offset; - state->VddcOffsetVid = (uint8_t)(table_info->us_ulv_voltage_offset * - VOLTAGE_VID_OFFSET_SCALE2 / VOLTAGE_VID_OFFSET_SCALE1); - - if (hwmgr->chip_id == CHIP_POLARIS12 || hwmgr->is_kicker) - state->VddcPhase = data->vddc_phase_shed_control ^ 0x3; - else - state->VddcPhase = (data->vddc_phase_shed_control) ? 0 : 1; - - CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm); - CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm1); - CONVERT_FROM_HOST_TO_SMC_US(state->VddcOffset); - - return 0; -} - -static int polaris10_populate_ulv_state(struct pp_hwmgr *hwmgr, - struct SMU74_Discrete_DpmTable *table) -{ - return polaris10_populate_ulv_level(hwmgr, &table->Ulv); -} - -static int polaris10_populate_smc_link_level(struct pp_hwmgr *hwmgr, - struct SMU74_Discrete_DpmTable *table) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - struct smu7_dpm_table *dpm_table = &data->dpm_table; - int i; - - /* Index (dpm_table->pcie_speed_table.count) - * is reserved for PCIE boot level. */ - for (i = 0; i <= dpm_table->pcie_speed_table.count; i++) { - table->LinkLevel[i].PcieGenSpeed = - (uint8_t)dpm_table->pcie_speed_table.dpm_levels[i].value; - table->LinkLevel[i].PcieLaneCount = (uint8_t)encode_pcie_lane_width( - dpm_table->pcie_speed_table.dpm_levels[i].param1); - table->LinkLevel[i].EnabledForActivity = 1; - table->LinkLevel[i].SPC = (uint8_t)(data->pcie_spc_cap & 0xff); - table->LinkLevel[i].DownThreshold = PP_HOST_TO_SMC_UL(5); - table->LinkLevel[i].UpThreshold = PP_HOST_TO_SMC_UL(30); - } - - smu_data->smc_state_table.LinkLevelCount = - (uint8_t)dpm_table->pcie_speed_table.count; - -/* To Do move to hwmgr */ - data->dpm_level_enable_mask.pcie_dpm_enable_mask = - phm_get_dpm_level_enable_mask_value(&dpm_table->pcie_speed_table); - - return 0; -} - - -static void polaris10_get_sclk_range_table(struct pp_hwmgr *hwmgr, - SMU74_Discrete_DpmTable *table) -{ - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - uint32_t i, ref_clk; - - struct pp_atom_ctrl_sclk_range_table range_table_from_vbios = { { {0} } }; - - ref_clk = smu7_get_xclk(hwmgr); - - if (0 == atomctrl_get_smc_sclk_range_table(hwmgr, &range_table_from_vbios)) { - for (i = 0; i < NUM_SCLK_RANGE; i++) { - table->SclkFcwRangeTable[i].vco_setting = range_table_from_vbios.entry[i].ucVco_setting; - table->SclkFcwRangeTable[i].postdiv = range_table_from_vbios.entry[i].ucPostdiv; - table->SclkFcwRangeTable[i].fcw_pcc = range_table_from_vbios.entry[i].usFcw_pcc; - - table->SclkFcwRangeTable[i].fcw_trans_upper = range_table_from_vbios.entry[i].usFcw_trans_upper; - table->SclkFcwRangeTable[i].fcw_trans_lower = range_table_from_vbios.entry[i].usRcw_trans_lower; - - CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_pcc); - CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_upper); - CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_lower); - } - return; - } - - for (i = 0; i < NUM_SCLK_RANGE; i++) { - smu_data->range_table[i].trans_lower_frequency = (ref_clk * Range_Table[i].fcw_trans_lower) >> Range_Table[i].postdiv; - smu_data->range_table[i].trans_upper_frequency = (ref_clk * Range_Table[i].fcw_trans_upper) >> Range_Table[i].postdiv; - - table->SclkFcwRangeTable[i].vco_setting = Range_Table[i].vco_setting; - table->SclkFcwRangeTable[i].postdiv = Range_Table[i].postdiv; - table->SclkFcwRangeTable[i].fcw_pcc = Range_Table[i].fcw_pcc; - - table->SclkFcwRangeTable[i].fcw_trans_upper = Range_Table[i].fcw_trans_upper; - table->SclkFcwRangeTable[i].fcw_trans_lower = Range_Table[i].fcw_trans_lower; - - CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_pcc); - CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_upper); - CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_lower); - } -} - -/** -* Calculates the SCLK dividers using the provided engine clock -* -* @param hwmgr the address of the hardware manager -* @param clock the engine clock to use to populate the structure -* @param sclk the SMC SCLK structure to be populated -*/ -static int polaris10_calculate_sclk_params(struct pp_hwmgr *hwmgr, - uint32_t clock, SMU_SclkSetting *sclk_setting) -{ - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - const SMU74_Discrete_DpmTable *table = &(smu_data->smc_state_table); - struct pp_atomctrl_clock_dividers_ai dividers; - uint32_t ref_clock; - uint32_t pcc_target_percent, pcc_target_freq, ss_target_percent, ss_target_freq; - uint8_t i; - int result; - uint64_t temp; - - sclk_setting->SclkFrequency = clock; - /* get the engine clock dividers for this clock value */ - result = atomctrl_get_engine_pll_dividers_ai(hwmgr, clock, ÷rs); - if (result == 0) { - sclk_setting->Fcw_int = dividers.usSclk_fcw_int; - sclk_setting->Fcw_frac = dividers.usSclk_fcw_frac; - sclk_setting->Pcc_fcw_int = dividers.usPcc_fcw_int; - sclk_setting->PllRange = dividers.ucSclkPllRange; - sclk_setting->Sclk_slew_rate = 0x400; - sclk_setting->Pcc_up_slew_rate = dividers.usPcc_fcw_slew_frac; - sclk_setting->Pcc_down_slew_rate = 0xffff; - sclk_setting->SSc_En = dividers.ucSscEnable; - sclk_setting->Fcw1_int = dividers.usSsc_fcw1_int; - sclk_setting->Fcw1_frac = dividers.usSsc_fcw1_frac; - sclk_setting->Sclk_ss_slew_rate = dividers.usSsc_fcw_slew_frac; - return result; - } - - ref_clock = smu7_get_xclk(hwmgr); - - for (i = 0; i < NUM_SCLK_RANGE; i++) { - if (clock > smu_data->range_table[i].trans_lower_frequency - && clock <= smu_data->range_table[i].trans_upper_frequency) { - sclk_setting->PllRange = i; - break; - } - } - - sclk_setting->Fcw_int = (uint16_t)((clock << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv) / ref_clock); - temp = clock << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv; - temp <<= 0x10; - do_div(temp, ref_clock); - sclk_setting->Fcw_frac = temp & 0xffff; - - pcc_target_percent = 10; /* Hardcode 10% for now. */ - pcc_target_freq = clock - (clock * pcc_target_percent / 100); - sclk_setting->Pcc_fcw_int = (uint16_t)((pcc_target_freq << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv) / ref_clock); - - ss_target_percent = 2; /* Hardcode 2% for now. */ - sclk_setting->SSc_En = 0; - if (ss_target_percent) { - sclk_setting->SSc_En = 1; - ss_target_freq = clock - (clock * ss_target_percent / 100); - sclk_setting->Fcw1_int = (uint16_t)((ss_target_freq << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv) / ref_clock); - temp = ss_target_freq << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv; - temp <<= 0x10; - do_div(temp, ref_clock); - sclk_setting->Fcw1_frac = temp & 0xffff; - } - - return 0; -} - -/** -* Populates single SMC SCLK structure using the provided engine clock -* -* @param hwmgr the address of the hardware manager -* @param clock the engine clock to use to populate the structure -* @param sclk the SMC SCLK structure to be populated -*/ - -static int polaris10_populate_single_graphic_level(struct pp_hwmgr *hwmgr, - uint32_t clock, uint16_t sclk_al_threshold, - struct SMU74_Discrete_GraphicsLevel *level) -{ - int result; - /* PP_Clocks minClocks; */ - uint32_t mvdd; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - SMU_SclkSetting curr_sclk_setting = { 0 }; - - result = polaris10_calculate_sclk_params(hwmgr, clock, &curr_sclk_setting); - - /* populate graphics levels */ - result = polaris10_get_dependency_volt_by_clk(hwmgr, - table_info->vdd_dep_on_sclk, clock, - &level->MinVoltage, &mvdd); - - PP_ASSERT_WITH_CODE((0 == result), - "can not find VDDC voltage value for " - "VDDC engine clock dependency table", - return result); - level->ActivityLevel = sclk_al_threshold; - - level->CcPwrDynRm = 0; - level->CcPwrDynRm1 = 0; - level->EnabledForActivity = 0; - level->EnabledForThrottle = 1; - level->UpHyst = 10; - level->DownHyst = 0; - level->VoltageDownHyst = 0; - level->PowerThrottle = 0; - data->display_timing.min_clock_in_sr = hwmgr->display_config.min_core_set_clock_in_sr; - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) - level->DeepSleepDivId = smu7_get_sleep_divider_id_from_clock(clock, - hwmgr->display_config.min_core_set_clock_in_sr); - - /* Default to slow, highest DPM level will be - * set to PPSMC_DISPLAY_WATERMARK_LOW later. - */ - if (data->update_up_hyst) - level->UpHyst = (uint8_t)data->up_hyst; - if (data->update_down_hyst) - level->DownHyst = (uint8_t)data->down_hyst; - - level->SclkSetting = curr_sclk_setting; - - CONVERT_FROM_HOST_TO_SMC_UL(level->MinVoltage); - CONVERT_FROM_HOST_TO_SMC_UL(level->CcPwrDynRm); - CONVERT_FROM_HOST_TO_SMC_UL(level->CcPwrDynRm1); - CONVERT_FROM_HOST_TO_SMC_US(level->ActivityLevel); - CONVERT_FROM_HOST_TO_SMC_UL(level->SclkSetting.SclkFrequency); - CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw_int); - CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw_frac); - CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Pcc_fcw_int); - CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Sclk_slew_rate); - CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Pcc_up_slew_rate); - CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Pcc_down_slew_rate); - CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw1_int); - CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw1_frac); - CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Sclk_ss_slew_rate); - return 0; -} - -/** -* Populates all SMC SCLK levels' structure based on the trimmed allowed dpm engine clock states -* -* @param hwmgr the address of the hardware manager -*/ -int polaris10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend); - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - struct smu7_dpm_table *dpm_table = &hw_data->dpm_table; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_ppt_v1_pcie_table *pcie_table = table_info->pcie_table; - uint8_t pcie_entry_cnt = (uint8_t) hw_data->dpm_table.pcie_speed_table.count; - int result = 0; - uint32_t array = smu_data->smu7_data.dpm_table_start + - offsetof(SMU74_Discrete_DpmTable, GraphicsLevel); - uint32_t array_size = sizeof(struct SMU74_Discrete_GraphicsLevel) * - SMU74_MAX_LEVELS_GRAPHICS; - struct SMU74_Discrete_GraphicsLevel *levels = - smu_data->smc_state_table.GraphicsLevel; - uint32_t i, max_entry; - uint8_t hightest_pcie_level_enabled = 0, - lowest_pcie_level_enabled = 0, - mid_pcie_level_enabled = 0, - count = 0; - - polaris10_get_sclk_range_table(hwmgr, &(smu_data->smc_state_table)); - - for (i = 0; i < dpm_table->sclk_table.count; i++) { - - result = polaris10_populate_single_graphic_level(hwmgr, - dpm_table->sclk_table.dpm_levels[i].value, - (uint16_t)smu_data->activity_target[i], - &(smu_data->smc_state_table.GraphicsLevel[i])); - if (result) - return result; - - /* Making sure only DPM level 0-1 have Deep Sleep Div ID populated. */ - if (i > 1) - levels[i].DeepSleepDivId = 0; - } - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_SPLLShutdownSupport)) - smu_data->smc_state_table.GraphicsLevel[0].SclkSetting.SSc_En = 0; - - smu_data->smc_state_table.GraphicsLevel[0].EnabledForActivity = 1; - smu_data->smc_state_table.GraphicsDpmLevelCount = - (uint8_t)dpm_table->sclk_table.count; - hw_data->dpm_level_enable_mask.sclk_dpm_enable_mask = - phm_get_dpm_level_enable_mask_value(&dpm_table->sclk_table); - - - if (pcie_table != NULL) { - PP_ASSERT_WITH_CODE((1 <= pcie_entry_cnt), - "There must be 1 or more PCIE levels defined in PPTable.", - return -EINVAL); - max_entry = pcie_entry_cnt - 1; - for (i = 0; i < dpm_table->sclk_table.count; i++) - levels[i].pcieDpmLevel = - (uint8_t) ((i < max_entry) ? i : max_entry); - } else { - while (hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask && - ((hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask & - (1 << (hightest_pcie_level_enabled + 1))) != 0)) - hightest_pcie_level_enabled++; - - while (hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask && - ((hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask & - (1 << lowest_pcie_level_enabled)) == 0)) - lowest_pcie_level_enabled++; - - while ((count < hightest_pcie_level_enabled) && - ((hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask & - (1 << (lowest_pcie_level_enabled + 1 + count))) == 0)) - count++; - - mid_pcie_level_enabled = (lowest_pcie_level_enabled + 1 + count) < - hightest_pcie_level_enabled ? - (lowest_pcie_level_enabled + 1 + count) : - hightest_pcie_level_enabled; - - /* set pcieDpmLevel to hightest_pcie_level_enabled */ - for (i = 2; i < dpm_table->sclk_table.count; i++) - levels[i].pcieDpmLevel = hightest_pcie_level_enabled; - - /* set pcieDpmLevel to lowest_pcie_level_enabled */ - levels[0].pcieDpmLevel = lowest_pcie_level_enabled; - - /* set pcieDpmLevel to mid_pcie_level_enabled */ - levels[1].pcieDpmLevel = mid_pcie_level_enabled; - } - /* level count will send to smc once at init smc table and never change */ - result = smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels, - (uint32_t)array_size, SMC_RAM_END); - - return result; -} - - -static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr, - uint32_t clock, struct SMU74_Discrete_MemoryLevel *mem_level) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - int result = 0; - struct cgs_display_info info = {0, 0, NULL}; - uint32_t mclk_stutter_mode_threshold = 40000; - - cgs_get_active_displays_info(hwmgr->device, &info); - - if (table_info->vdd_dep_on_mclk) { - result = polaris10_get_dependency_volt_by_clk(hwmgr, - table_info->vdd_dep_on_mclk, clock, - &mem_level->MinVoltage, &mem_level->MinMvdd); - PP_ASSERT_WITH_CODE((0 == result), - "can not find MinVddc voltage value from memory " - "VDDC voltage dependency table", return result); - } - - mem_level->MclkFrequency = clock; - mem_level->EnabledForThrottle = 1; - mem_level->EnabledForActivity = 0; - mem_level->UpHyst = 0; - mem_level->DownHyst = 100; - mem_level->VoltageDownHyst = 0; - mem_level->ActivityLevel = (uint16_t)data->mclk_activity_target; - mem_level->StutterEnable = false; - mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - - data->display_timing.num_existing_displays = info.display_count; - - if (mclk_stutter_mode_threshold && - (clock <= mclk_stutter_mode_threshold) && - (PHM_READ_FIELD(hwmgr->device, DPG_PIPE_STUTTER_CONTROL, - STUTTER_ENABLE) & 0x1)) - mem_level->StutterEnable = true; - - if (!result) { - CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MinMvdd); - CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MclkFrequency); - CONVERT_FROM_HOST_TO_SMC_US(mem_level->ActivityLevel); - CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MinVoltage); - } - return result; -} - -/** -* Populates all SMC MCLK levels' structure based on the trimmed allowed dpm memory clock states -* -* @param hwmgr the address of the hardware manager -*/ -int polaris10_populate_all_memory_levels(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend); - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - struct smu7_dpm_table *dpm_table = &hw_data->dpm_table; - int result; - /* populate MCLK dpm table to SMU7 */ - uint32_t array = smu_data->smu7_data.dpm_table_start + - offsetof(SMU74_Discrete_DpmTable, MemoryLevel); - uint32_t array_size = sizeof(SMU74_Discrete_MemoryLevel) * - SMU74_MAX_LEVELS_MEMORY; - struct SMU74_Discrete_MemoryLevel *levels = - smu_data->smc_state_table.MemoryLevel; - uint32_t i; - - for (i = 0; i < dpm_table->mclk_table.count; i++) { - PP_ASSERT_WITH_CODE((0 != dpm_table->mclk_table.dpm_levels[i].value), - "can not populate memory level as memory clock is zero", - return -EINVAL); - result = polaris10_populate_single_memory_level(hwmgr, - dpm_table->mclk_table.dpm_levels[i].value, - &levels[i]); - if (i == dpm_table->mclk_table.count - 1) { - levels[i].DisplayWatermark = PPSMC_DISPLAY_WATERMARK_HIGH; - levels[i].EnabledForActivity = 1; - } - if (result) - return result; - } - - /* In order to prevent MC activity from stutter mode to push DPM up, - * the UVD change complements this by putting the MCLK in - * a higher state by default such that we are not affected by - * up threshold or and MCLK DPM latency. - */ - levels[0].ActivityLevel = 0x1f; - CONVERT_FROM_HOST_TO_SMC_US(levels[0].ActivityLevel); - - smu_data->smc_state_table.MemoryDpmLevelCount = - (uint8_t)dpm_table->mclk_table.count; - hw_data->dpm_level_enable_mask.mclk_dpm_enable_mask = - phm_get_dpm_level_enable_mask_value(&dpm_table->mclk_table); - - /* level count will send to smc once at init smc table and never change */ - result = smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels, - (uint32_t)array_size, SMC_RAM_END); - - return result; -} - -/** -* Populates the SMC MVDD structure using the provided memory clock. -* -* @param hwmgr the address of the hardware manager -* @param mclk the MCLK value to be used in the decision if MVDD should be high or low. -* @param voltage the SMC VOLTAGE structure to be populated -*/ -static int polaris10_populate_mvdd_value(struct pp_hwmgr *hwmgr, - uint32_t mclk, SMIO_Pattern *smio_pat) -{ - const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - uint32_t i = 0; - - if (SMU7_VOLTAGE_CONTROL_NONE != data->mvdd_control) { - /* find mvdd value which clock is more than request */ - for (i = 0; i < table_info->vdd_dep_on_mclk->count; i++) { - if (mclk <= table_info->vdd_dep_on_mclk->entries[i].clk) { - smio_pat->Voltage = data->mvdd_voltage_table.entries[i].value; - break; - } - } - PP_ASSERT_WITH_CODE(i < table_info->vdd_dep_on_mclk->count, - "MVDD Voltage is outside the supported range.", - return -EINVAL); - } else - return -EINVAL; - - return 0; -} - -static int polaris10_populate_smc_acpi_level(struct pp_hwmgr *hwmgr, - SMU74_Discrete_DpmTable *table) -{ - int result = 0; - uint32_t sclk_frequency; - const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - SMIO_Pattern vol_level; - uint32_t mvdd; - uint16_t us_mvdd; - - table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC; - - /* Get MinVoltage and Frequency from DPM0, - * already converted to SMC_UL */ - sclk_frequency = data->vbios_boot_state.sclk_bootup_value; - result = polaris10_get_dependency_volt_by_clk(hwmgr, - table_info->vdd_dep_on_sclk, - sclk_frequency, - &table->ACPILevel.MinVoltage, &mvdd); - PP_ASSERT_WITH_CODE((0 == result), - "Cannot find ACPI VDDC voltage value " - "in Clock Dependency Table", - ); - - result = polaris10_calculate_sclk_params(hwmgr, sclk_frequency, &(table->ACPILevel.SclkSetting)); - PP_ASSERT_WITH_CODE(result == 0, "Error retrieving Engine Clock dividers from VBIOS.", return result); - - table->ACPILevel.DeepSleepDivId = 0; - table->ACPILevel.CcPwrDynRm = 0; - table->ACPILevel.CcPwrDynRm1 = 0; - - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.Flags); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.MinVoltage); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm1); - - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SclkSetting.SclkFrequency); - CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw_int); - CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw_frac); - CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Pcc_fcw_int); - CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Sclk_slew_rate); - CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Pcc_up_slew_rate); - CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Pcc_down_slew_rate); - CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw1_int); - CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw1_frac); - CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Sclk_ss_slew_rate); - - - /* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */ - table->MemoryACPILevel.MclkFrequency = data->vbios_boot_state.mclk_bootup_value; - result = polaris10_get_dependency_volt_by_clk(hwmgr, - table_info->vdd_dep_on_mclk, - table->MemoryACPILevel.MclkFrequency, - &table->MemoryACPILevel.MinVoltage, &mvdd); - PP_ASSERT_WITH_CODE((0 == result), - "Cannot find ACPI VDDCI voltage value " - "in Clock Dependency Table", - ); - - us_mvdd = 0; - if ((SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) || - (data->mclk_dpm_key_disabled)) - us_mvdd = data->vbios_boot_state.mvdd_bootup_value; - else { - if (!polaris10_populate_mvdd_value(hwmgr, - data->dpm_table.mclk_table.dpm_levels[0].value, - &vol_level)) - us_mvdd = vol_level.Voltage; - } - - if (0 == polaris10_populate_mvdd_value(hwmgr, 0, &vol_level)) - table->MemoryACPILevel.MinMvdd = PP_HOST_TO_SMC_UL(vol_level.Voltage); - else - table->MemoryACPILevel.MinMvdd = 0; - - table->MemoryACPILevel.StutterEnable = false; - - table->MemoryACPILevel.EnabledForThrottle = 0; - table->MemoryACPILevel.EnabledForActivity = 0; - table->MemoryACPILevel.UpHyst = 0; - table->MemoryACPILevel.DownHyst = 100; - table->MemoryACPILevel.VoltageDownHyst = 0; - table->MemoryACPILevel.ActivityLevel = - PP_HOST_TO_SMC_US((uint16_t)data->mclk_activity_target); - - CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MclkFrequency); - CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MinVoltage); - - return result; -} - -static int polaris10_populate_smc_vce_level(struct pp_hwmgr *hwmgr, - SMU74_Discrete_DpmTable *table) -{ - int result = -EINVAL; - uint8_t count; - struct pp_atomctrl_clock_dividers_vi dividers; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = - table_info->mm_dep_table; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - uint32_t vddci; - - table->VceLevelCount = (uint8_t)(mm_table->count); - table->VceBootLevel = 0; - - for (count = 0; count < table->VceLevelCount; count++) { - table->VceLevel[count].Frequency = mm_table->entries[count].eclk; - table->VceLevel[count].MinVoltage = 0; - table->VceLevel[count].MinVoltage |= - (mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT; - - if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) - vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table), - mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); - else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) - vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA; - else - vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT; - - - table->VceLevel[count].MinVoltage |= - (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; - table->VceLevel[count].MinVoltage |= 1 << PHASES_SHIFT; - - /*retrieve divider value for VBIOS */ - result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, - table->VceLevel[count].Frequency, ÷rs); - PP_ASSERT_WITH_CODE((0 == result), - "can not find divide id for VCE engine clock", - return result); - - table->VceLevel[count].Divider = (uint8_t)dividers.pll_post_divider; - - CONVERT_FROM_HOST_TO_SMC_UL(table->VceLevel[count].Frequency); - CONVERT_FROM_HOST_TO_SMC_UL(table->VceLevel[count].MinVoltage); - } - return result; -} - - -static int polaris10_populate_smc_samu_level(struct pp_hwmgr *hwmgr, - SMU74_Discrete_DpmTable *table) -{ - int result = -EINVAL; - uint8_t count; - struct pp_atomctrl_clock_dividers_vi dividers; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = - table_info->mm_dep_table; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - uint32_t vddci; - - table->SamuBootLevel = 0; - table->SamuLevelCount = (uint8_t)(mm_table->count); - - for (count = 0; count < table->SamuLevelCount; count++) { - /* not sure whether we need evclk or not */ - table->SamuLevel[count].MinVoltage = 0; - table->SamuLevel[count].Frequency = mm_table->entries[count].samclock; - table->SamuLevel[count].MinVoltage |= (mm_table->entries[count].vddc * - VOLTAGE_SCALE) << VDDC_SHIFT; - - if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) - vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table), - mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); - else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) - vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA; - else - vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT; - - table->SamuLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; - table->SamuLevel[count].MinVoltage |= 1 << PHASES_SHIFT; - - /* retrieve divider value for VBIOS */ - result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, - table->SamuLevel[count].Frequency, ÷rs); - PP_ASSERT_WITH_CODE((0 == result), - "can not find divide id for samu clock", return result); - - table->SamuLevel[count].Divider = (uint8_t)dividers.pll_post_divider; - - CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].Frequency); - CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].MinVoltage); - } - return result; -} - -static int polaris10_populate_memory_timing_parameters(struct pp_hwmgr *hwmgr, - int32_t eng_clock, int32_t mem_clock, - SMU74_Discrete_MCArbDramTimingTableEntry *arb_regs) -{ - uint32_t dram_timing; - uint32_t dram_timing2; - uint32_t burst_time; - int result; - - result = atomctrl_set_engine_dram_timings_rv770(hwmgr, - eng_clock, mem_clock); - PP_ASSERT_WITH_CODE(result == 0, - "Error calling VBIOS to set DRAM_TIMING.", return result); - - dram_timing = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING); - dram_timing2 = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING2); - burst_time = PHM_READ_FIELD(hwmgr->device, MC_ARB_BURST_TIME, STATE0); - - - arb_regs->McArbDramTiming = PP_HOST_TO_SMC_UL(dram_timing); - arb_regs->McArbDramTiming2 = PP_HOST_TO_SMC_UL(dram_timing2); - arb_regs->McArbBurstTime = (uint8_t)burst_time; - - return 0; -} - -static int polaris10_program_memory_timing_parameters(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend); - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - struct SMU74_Discrete_MCArbDramTimingTable arb_regs; - uint32_t i, j; - int result = 0; - - for (i = 0; i < hw_data->dpm_table.sclk_table.count; i++) { - for (j = 0; j < hw_data->dpm_table.mclk_table.count; j++) { - result = polaris10_populate_memory_timing_parameters(hwmgr, - hw_data->dpm_table.sclk_table.dpm_levels[i].value, - hw_data->dpm_table.mclk_table.dpm_levels[j].value, - &arb_regs.entries[i][j]); - if (result == 0) - result = atomctrl_set_ac_timing_ai(hwmgr, hw_data->dpm_table.mclk_table.dpm_levels[j].value, j); - if (result != 0) - return result; - } - } - - result = smu7_copy_bytes_to_smc( - hwmgr, - smu_data->smu7_data.arb_table_start, - (uint8_t *)&arb_regs, - sizeof(SMU74_Discrete_MCArbDramTimingTable), - SMC_RAM_END); - return result; -} - -static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr, - struct SMU74_Discrete_DpmTable *table) -{ - int result = -EINVAL; - uint8_t count; - struct pp_atomctrl_clock_dividers_vi dividers; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = - table_info->mm_dep_table; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - uint32_t vddci; - - table->UvdLevelCount = (uint8_t)(mm_table->count); - table->UvdBootLevel = 0; - - for (count = 0; count < table->UvdLevelCount; count++) { - table->UvdLevel[count].MinVoltage = 0; - table->UvdLevel[count].VclkFrequency = mm_table->entries[count].vclk; - table->UvdLevel[count].DclkFrequency = mm_table->entries[count].dclk; - table->UvdLevel[count].MinVoltage |= (mm_table->entries[count].vddc * - VOLTAGE_SCALE) << VDDC_SHIFT; - - if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) - vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table), - mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); - else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) - vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA; - else - vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT; - - table->UvdLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; - table->UvdLevel[count].MinVoltage |= 1 << PHASES_SHIFT; - - /* retrieve divider value for VBIOS */ - result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, - table->UvdLevel[count].VclkFrequency, ÷rs); - PP_ASSERT_WITH_CODE((0 == result), - "can not find divide id for Vclk clock", return result); - - table->UvdLevel[count].VclkDivider = (uint8_t)dividers.pll_post_divider; - - result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, - table->UvdLevel[count].DclkFrequency, ÷rs); - PP_ASSERT_WITH_CODE((0 == result), - "can not find divide id for Dclk clock", return result); - - table->UvdLevel[count].DclkDivider = (uint8_t)dividers.pll_post_divider; - - CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].VclkFrequency); - CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].DclkFrequency); - CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].MinVoltage); - } - - return result; -} - -static int polaris10_populate_smc_boot_level(struct pp_hwmgr *hwmgr, - struct SMU74_Discrete_DpmTable *table) -{ - int result = 0; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - table->GraphicsBootLevel = 0; - table->MemoryBootLevel = 0; - - /* find boot level from dpm table */ - result = phm_find_boot_level(&(data->dpm_table.sclk_table), - data->vbios_boot_state.sclk_bootup_value, - (uint32_t *)&(table->GraphicsBootLevel)); - - result = phm_find_boot_level(&(data->dpm_table.mclk_table), - data->vbios_boot_state.mclk_bootup_value, - (uint32_t *)&(table->MemoryBootLevel)); - - table->BootVddc = data->vbios_boot_state.vddc_bootup_value * - VOLTAGE_SCALE; - table->BootVddci = data->vbios_boot_state.vddci_bootup_value * - VOLTAGE_SCALE; - table->BootMVdd = data->vbios_boot_state.mvdd_bootup_value * - VOLTAGE_SCALE; - - CONVERT_FROM_HOST_TO_SMC_US(table->BootVddc); - CONVERT_FROM_HOST_TO_SMC_US(table->BootVddci); - CONVERT_FROM_HOST_TO_SMC_US(table->BootMVdd); - - return 0; -} - -static int polaris10_populate_smc_initailial_state(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend); - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - uint8_t count, level; - - count = (uint8_t)(table_info->vdd_dep_on_sclk->count); - - for (level = 0; level < count; level++) { - if (table_info->vdd_dep_on_sclk->entries[level].clk >= - hw_data->vbios_boot_state.sclk_bootup_value) { - smu_data->smc_state_table.GraphicsBootLevel = level; - break; - } - } - - count = (uint8_t)(table_info->vdd_dep_on_mclk->count); - for (level = 0; level < count; level++) { - if (table_info->vdd_dep_on_mclk->entries[level].clk >= - hw_data->vbios_boot_state.mclk_bootup_value) { - smu_data->smc_state_table.MemoryBootLevel = level; - break; - } - } - - return 0; -} - - -static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr) -{ - uint32_t ro, efuse, volt_without_cks, volt_with_cks, value, max, min; - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - - uint8_t i, stretch_amount, stretch_amount2, volt_offset = 0; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table = - table_info->vdd_dep_on_sclk; - - stretch_amount = (uint8_t)table_info->cac_dtp_table->usClockStretchAmount; - - /* Read SMU_Eefuse to read and calculate RO and determine - * if the part is SS or FF. if RO >= 1660MHz, part is FF. - */ - efuse = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, - ixSMU_EFUSE_0 + (67 * 4)); - efuse &= 0xFF000000; - efuse = efuse >> 24; - - if (hwmgr->chip_id == CHIP_POLARIS10) { - min = 1000; - max = 2300; - } else { - min = 1100; - max = 2100; - } - - ro = efuse * (max - min) / 255 + min; - - /* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */ - for (i = 0; i < sclk_table->count; i++) { - smu_data->smc_state_table.Sclk_CKS_masterEn0_7 |= - sclk_table->entries[i].cks_enable << i; - if (hwmgr->chip_id == CHIP_POLARIS10) { - volt_without_cks = (uint32_t)((2753594000U + (sclk_table->entries[i].clk/100) * 136418 - (ro - 70) * 1000000) / \ - (2424180 - (sclk_table->entries[i].clk/100) * 1132925/1000)); - volt_with_cks = (uint32_t)((2797202000U + sclk_table->entries[i].clk/100 * 3232 - (ro - 65) * 1000000) / \ - (2522480 - sclk_table->entries[i].clk/100 * 115764/100)); - } else { - volt_without_cks = (uint32_t)((2416794800U + (sclk_table->entries[i].clk/100) * 1476925/10 - (ro - 50) * 1000000) / \ - (2625416 - (sclk_table->entries[i].clk/100) * (12586807/10000))); - volt_with_cks = (uint32_t)((2999656000U - sclk_table->entries[i].clk/100 * 392803 - (ro - 44) * 1000000) / \ - (3422454 - sclk_table->entries[i].clk/100 * (18886376/10000))); - } - - if (volt_without_cks >= volt_with_cks) - volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks + - sclk_table->entries[i].cks_voffset) * 100 + 624) / 625); - - smu_data->smc_state_table.Sclk_voltageOffset[i] = volt_offset; - } - - smu_data->smc_state_table.LdoRefSel = (table_info->cac_dtp_table->ucCKS_LDO_REFSEL != 0) ? table_info->cac_dtp_table->ucCKS_LDO_REFSEL : 6; - /* Populate CKS Lookup Table */ - if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5) - stretch_amount2 = 0; - else if (stretch_amount == 3 || stretch_amount == 4) - stretch_amount2 = 1; - else { - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_ClockStretcher); - PP_ASSERT_WITH_CODE(false, - "Stretch Amount in PPTable not supported\n", - return -EINVAL); - } - - value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL); - value &= 0xFFFFFFFE; - cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL, value); - - return 0; -} - -/** -* Populates the SMC VRConfig field in DPM table. -* -* @param hwmgr the address of the hardware manager -* @param table the SMC DPM table structure to be populated -* @return always 0 -*/ -static int polaris10_populate_vr_config(struct pp_hwmgr *hwmgr, - struct SMU74_Discrete_DpmTable *table) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - uint16_t config; - - config = VR_MERGED_WITH_VDDC; - table->VRConfig |= (config << VRCONF_VDDGFX_SHIFT); - - /* Set Vddc Voltage Controller */ - if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) { - config = VR_SVI2_PLANE_1; - table->VRConfig |= config; - } else { - PP_ASSERT_WITH_CODE(false, - "VDDC should be on SVI2 control in merged mode!", - ); - } - /* Set Vddci Voltage Controller */ - if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) { - config = VR_SVI2_PLANE_2; /* only in merged mode */ - table->VRConfig |= (config << VRCONF_VDDCI_SHIFT); - } else if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) { - config = VR_SMIO_PATTERN_1; - table->VRConfig |= (config << VRCONF_VDDCI_SHIFT); - } else { - config = VR_STATIC_VOLTAGE; - table->VRConfig |= (config << VRCONF_VDDCI_SHIFT); - } - /* Set Mvdd Voltage Controller */ - if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->mvdd_control) { - config = VR_SVI2_PLANE_2; - table->VRConfig |= (config << VRCONF_MVDD_SHIFT); - cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, smu_data->smu7_data.soft_regs_start + - offsetof(SMU74_SoftRegisters, AllowMvddSwitch), 0x1); - } else { - config = VR_STATIC_VOLTAGE; - table->VRConfig |= (config << VRCONF_MVDD_SHIFT); - } - - return 0; -} - - -static int polaris10_populate_avfs_parameters(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - - SMU74_Discrete_DpmTable *table = &(smu_data->smc_state_table); - int result = 0; - struct pp_atom_ctrl__avfs_parameters avfs_params = {0}; - AVFS_meanNsigma_t AVFS_meanNsigma = { {0} }; - AVFS_Sclk_Offset_t AVFS_SclkOffset = { {0} }; - uint32_t tmp, i; - - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)hwmgr->pptable; - struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table = - table_info->vdd_dep_on_sclk; - - - if (((struct smu7_smumgr *)smu_data)->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED) - return result; - - result = atomctrl_get_avfs_information(hwmgr, &avfs_params); - - if (0 == result) { - table->BTCGB_VDROOP_TABLE[0].a0 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a0); - table->BTCGB_VDROOP_TABLE[0].a1 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a1); - table->BTCGB_VDROOP_TABLE[0].a2 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a2); - table->BTCGB_VDROOP_TABLE[1].a0 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0); - table->BTCGB_VDROOP_TABLE[1].a1 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1); - table->BTCGB_VDROOP_TABLE[1].a2 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2); - table->AVFSGB_VDROOP_TABLE[0].m1 = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_m1); - table->AVFSGB_VDROOP_TABLE[0].m2 = PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSON_m2); - table->AVFSGB_VDROOP_TABLE[0].b = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_b); - table->AVFSGB_VDROOP_TABLE[0].m1_shift = 24; - table->AVFSGB_VDROOP_TABLE[0].m2_shift = 12; - table->AVFSGB_VDROOP_TABLE[1].m1 = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1); - table->AVFSGB_VDROOP_TABLE[1].m2 = PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2); - table->AVFSGB_VDROOP_TABLE[1].b = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b); - table->AVFSGB_VDROOP_TABLE[1].m1_shift = 24; - table->AVFSGB_VDROOP_TABLE[1].m2_shift = 12; - table->MaxVoltage = PP_HOST_TO_SMC_US(avfs_params.usMaxVoltage_0_25mv); - AVFS_meanNsigma.Aconstant[0] = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant0); - AVFS_meanNsigma.Aconstant[1] = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant1); - AVFS_meanNsigma.Aconstant[2] = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant2); - AVFS_meanNsigma.DC_tol_sigma = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_DC_tol_sigma); - AVFS_meanNsigma.Platform_mean = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_mean); - AVFS_meanNsigma.PSM_Age_CompFactor = PP_HOST_TO_SMC_US(avfs_params.usPSM_Age_ComFactor); - AVFS_meanNsigma.Platform_sigma = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_sigma); - - for (i = 0; i < NUM_VFT_COLUMNS; i++) { - AVFS_meanNsigma.Static_Voltage_Offset[i] = (uint8_t)(sclk_table->entries[i].cks_voffset * 100 / 625); - AVFS_SclkOffset.Sclk_Offset[i] = PP_HOST_TO_SMC_US((uint16_t)(sclk_table->entries[i].sclk_offset) / 100); - } - - result = smu7_read_smc_sram_dword(hwmgr, - SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU74_Firmware_Header, AvfsMeanNSigma), - &tmp, SMC_RAM_END); - - smu7_copy_bytes_to_smc(hwmgr, - tmp, - (uint8_t *)&AVFS_meanNsigma, - sizeof(AVFS_meanNsigma_t), - SMC_RAM_END); - - result = smu7_read_smc_sram_dword(hwmgr, - SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU74_Firmware_Header, AvfsSclkOffsetTable), - &tmp, SMC_RAM_END); - smu7_copy_bytes_to_smc(hwmgr, - tmp, - (uint8_t *)&AVFS_SclkOffset, - sizeof(AVFS_Sclk_Offset_t), - SMC_RAM_END); - - data->avfs_vdroop_override_setting = (avfs_params.ucEnableGB_VDROOP_TABLE_CKSON << BTCGB0_Vdroop_Enable_SHIFT) | - (avfs_params.ucEnableGB_VDROOP_TABLE_CKSOFF << BTCGB1_Vdroop_Enable_SHIFT) | - (avfs_params.ucEnableGB_FUSE_TABLE_CKSON << AVFSGB0_Vdroop_Enable_SHIFT) | - (avfs_params.ucEnableGB_FUSE_TABLE_CKSOFF << AVFSGB1_Vdroop_Enable_SHIFT); - data->apply_avfs_cks_off_voltage = (avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage == 1) ? true : false; - } - return result; -} - - -/** -* Initialize the ARB DRAM timing table's index field. -* -* @param hwmgr the address of the powerplay hardware manager. -* @return always 0 -*/ -static int polaris10_init_arb_table_index(struct pp_hwmgr *hwmgr) -{ - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - uint32_t tmp; - int result; - - /* This is a read-modify-write on the first byte of the ARB table. - * The first byte in the SMU73_Discrete_MCArbDramTimingTable structure - * is the field 'current'. - * This solution is ugly, but we never write the whole table only - * individual fields in it. - * In reality this field should not be in that structure - * but in a soft register. - */ - result = smu7_read_smc_sram_dword(hwmgr, - smu_data->smu7_data.arb_table_start, &tmp, SMC_RAM_END); - - if (result) - return result; - - tmp &= 0x00FFFFFF; - tmp |= ((uint32_t)MC_CG_ARB_FREQ_F1) << 24; - - return smu7_write_smc_sram_dword(hwmgr, - smu_data->smu7_data.arb_table_start, tmp, SMC_RAM_END); -} - -static void polaris10_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr) -{ - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - - if (table_info && - table_info->cac_dtp_table->usPowerTuneDataSetID <= POWERTUNE_DEFAULT_SET_MAX && - table_info->cac_dtp_table->usPowerTuneDataSetID) - smu_data->power_tune_defaults = - &polaris10_power_tune_data_set_array - [table_info->cac_dtp_table->usPowerTuneDataSetID - 1]; - else - smu_data->power_tune_defaults = &polaris10_power_tune_data_set_array[0]; - -} - -static void polaris10_save_default_power_profile(struct pp_hwmgr *hwmgr) -{ - struct polaris10_smumgr *data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - struct SMU74_Discrete_GraphicsLevel *levels = - data->smc_state_table.GraphicsLevel; - unsigned min_level = 1; - - hwmgr->default_gfx_power_profile.activity_threshold = - be16_to_cpu(levels[0].ActivityLevel); - hwmgr->default_gfx_power_profile.up_hyst = levels[0].UpHyst; - hwmgr->default_gfx_power_profile.down_hyst = levels[0].DownHyst; - hwmgr->default_gfx_power_profile.type = AMD_PP_GFX_PROFILE; - - hwmgr->default_compute_power_profile = hwmgr->default_gfx_power_profile; - hwmgr->default_compute_power_profile.type = AMD_PP_COMPUTE_PROFILE; - - /* Workaround compute SDMA instability: disable lowest SCLK - * DPM level. Optimize compute power profile: Use only highest - * 2 power levels (if more than 2 are available), Hysteresis: - * 0ms up, 5ms down - */ - if (data->smc_state_table.GraphicsDpmLevelCount > 2) - min_level = data->smc_state_table.GraphicsDpmLevelCount - 2; - else if (data->smc_state_table.GraphicsDpmLevelCount == 2) - min_level = 1; - else - min_level = 0; - hwmgr->default_compute_power_profile.min_sclk = - be32_to_cpu(levels[min_level].SclkSetting.SclkFrequency); - hwmgr->default_compute_power_profile.up_hyst = 0; - hwmgr->default_compute_power_profile.down_hyst = 5; - - hwmgr->gfx_power_profile = hwmgr->default_gfx_power_profile; - hwmgr->compute_power_profile = hwmgr->default_compute_power_profile; -} - -/** -* Initializes the SMC table and uploads it -* -* @param hwmgr the address of the powerplay hardware manager. -* @return always 0 -*/ -int polaris10_init_smc_table(struct pp_hwmgr *hwmgr) -{ - int result; - struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend); - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct SMU74_Discrete_DpmTable *table = &(smu_data->smc_state_table); - uint8_t i; - struct pp_atomctrl_gpio_pin_assignment gpio_pin; - pp_atomctrl_clock_dividers_vi dividers; - - polaris10_initialize_power_tune_defaults(hwmgr); - - if (SMU7_VOLTAGE_CONTROL_NONE != hw_data->voltage_control) - polaris10_populate_smc_voltage_tables(hwmgr, table); - - table->SystemFlags = 0; - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_AutomaticDCTransition)) - table->SystemFlags |= PPSMC_SYSTEMFLAG_GPIO_DC; - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_StepVddc)) - table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC; - - if (hw_data->is_memory_gddr5) - table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5; - - if (hw_data->ulv_supported && table_info->us_ulv_voltage_offset) { - result = polaris10_populate_ulv_state(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize ULV state!", return result); - cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, - ixCG_ULV_PARAMETER, SMU7_CGULVPARAMETER_DFLT); - } - - result = polaris10_populate_smc_link_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize Link Level!", return result); - - result = polaris10_populate_all_graphic_levels(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize Graphics Level!", return result); - - result = polaris10_populate_all_memory_levels(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize Memory Level!", return result); - - result = polaris10_populate_smc_acpi_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize ACPI Level!", return result); - - result = polaris10_populate_smc_vce_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize VCE Level!", return result); - - result = polaris10_populate_smc_samu_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize SAMU Level!", return result); - - /* Since only the initial state is completely set up at this point - * (the other states are just copies of the boot state) we only - * need to populate the ARB settings for the initial state. - */ - result = polaris10_program_memory_timing_parameters(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to Write ARB settings for the initial state.", return result); - - result = polaris10_populate_smc_uvd_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize UVD Level!", return result); - - result = polaris10_populate_smc_boot_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize Boot Level!", return result); - - result = polaris10_populate_smc_initailial_state(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize Boot State!", return result); - - result = polaris10_populate_bapm_parameters_in_dpm_table(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to populate BAPM Parameters!", return result); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_ClockStretcher)) { - result = polaris10_populate_clock_stretcher_data_table(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to populate Clock Stretcher Data Table!", - return result); - } - - result = polaris10_populate_avfs_parameters(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, "Failed to populate AVFS Parameters!", return result;); - - table->CurrSclkPllRange = 0xff; - table->GraphicsVoltageChangeEnable = 1; - table->GraphicsThermThrottleEnable = 1; - table->GraphicsInterval = 1; - table->VoltageInterval = 1; - table->ThermalInterval = 1; - table->TemperatureLimitHigh = - table_info->cac_dtp_table->usTargetOperatingTemp * - SMU7_Q88_FORMAT_CONVERSION_UNIT; - table->TemperatureLimitLow = - (table_info->cac_dtp_table->usTargetOperatingTemp - 1) * - SMU7_Q88_FORMAT_CONVERSION_UNIT; - table->MemoryVoltageChangeEnable = 1; - table->MemoryInterval = 1; - table->VoltageResponseTime = 0; - table->PhaseResponseTime = 0; - table->MemoryThermThrottleEnable = 1; - table->PCIeBootLinkLevel = 0; - table->PCIeGenInterval = 1; - table->VRConfig = 0; - - result = polaris10_populate_vr_config(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to populate VRConfig setting!", return result); - - table->ThermGpio = 17; - table->SclkStepSize = 0x4000; - - if (atomctrl_get_pp_assign_pin(hwmgr, VDDC_VRHOT_GPIO_PINID, &gpio_pin)) { - table->VRHotGpio = gpio_pin.uc_gpio_pin_bit_shift; - } else { - table->VRHotGpio = SMU7_UNUSED_GPIO_PIN; - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_RegulatorHot); - } - - if (atomctrl_get_pp_assign_pin(hwmgr, PP_AC_DC_SWITCH_GPIO_PINID, - &gpio_pin)) { - table->AcDcGpio = gpio_pin.uc_gpio_pin_bit_shift; - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_AutomaticDCTransition); - } else { - table->AcDcGpio = SMU7_UNUSED_GPIO_PIN; - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_AutomaticDCTransition); - } - - /* Thermal Output GPIO */ - if (atomctrl_get_pp_assign_pin(hwmgr, THERMAL_INT_OUTPUT_GPIO_PINID, - &gpio_pin)) { - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_ThermalOutGPIO); - - table->ThermOutGpio = gpio_pin.uc_gpio_pin_bit_shift; - - /* For porlarity read GPIOPAD_A with assigned Gpio pin - * since VBIOS will program this register to set 'inactive state', - * driver can then determine 'active state' from this and - * program SMU with correct polarity - */ - table->ThermOutPolarity = (0 == (cgs_read_register(hwmgr->device, mmGPIOPAD_A) - & (1 << gpio_pin.uc_gpio_pin_bit_shift))) ? 1:0; - table->ThermOutMode = SMU7_THERM_OUT_MODE_THERM_ONLY; - - /* if required, combine VRHot/PCC with thermal out GPIO */ - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_RegulatorHot) - && phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_CombinePCCWithThermalSignal)) - table->ThermOutMode = SMU7_THERM_OUT_MODE_THERM_VRHOT; - } else { - table->ThermOutGpio = 17; - table->ThermOutPolarity = 1; - table->ThermOutMode = SMU7_THERM_OUT_MODE_DISABLE; - } - - /* Populate BIF_SCLK levels into SMC DPM table */ - for (i = 0; i <= hw_data->dpm_table.pcie_speed_table.count; i++) { - result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, smu_data->bif_sclk_table[i], ÷rs); - PP_ASSERT_WITH_CODE((result == 0), "Can not find DFS divide id for Sclk", return result); - - if (i == 0) - table->Ulv.BifSclkDfs = PP_HOST_TO_SMC_US((USHORT)(dividers.pll_post_divider)); - else - table->LinkLevel[i-1].BifSclkDfs = PP_HOST_TO_SMC_US((USHORT)(dividers.pll_post_divider)); - } - - for (i = 0; i < SMU74_MAX_ENTRIES_SMIO; i++) - table->Smio[i] = PP_HOST_TO_SMC_UL(table->Smio[i]); - - CONVERT_FROM_HOST_TO_SMC_UL(table->SystemFlags); - CONVERT_FROM_HOST_TO_SMC_UL(table->VRConfig); - CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask1); - CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask2); - CONVERT_FROM_HOST_TO_SMC_UL(table->SclkStepSize); - CONVERT_FROM_HOST_TO_SMC_UL(table->CurrSclkPllRange); - CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitHigh); - CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitLow); - CONVERT_FROM_HOST_TO_SMC_US(table->VoltageResponseTime); - CONVERT_FROM_HOST_TO_SMC_US(table->PhaseResponseTime); - - /* Upload all dpm data to SMC memory.(dpm level, dpm level count etc) */ - result = smu7_copy_bytes_to_smc(hwmgr, - smu_data->smu7_data.dpm_table_start + - offsetof(SMU74_Discrete_DpmTable, SystemFlags), - (uint8_t *)&(table->SystemFlags), - sizeof(SMU74_Discrete_DpmTable) - 3 * sizeof(SMU74_PIDController), - SMC_RAM_END); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to upload dpm data to SMC memory!", return result); - - result = polaris10_init_arb_table_index(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to upload arb data to SMC memory!", return result); - - result = polaris10_populate_pm_fuses(hwmgr); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to populate PM fuses to SMC memory!", return result); - - polaris10_save_default_power_profile(hwmgr); - - return 0; -} - -static int polaris10_program_mem_timing_parameters(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - if (data->need_update_smu7_dpm_table & - (DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_OD_UPDATE_MCLK)) - return polaris10_program_memory_timing_parameters(hwmgr); - - return 0; -} - -int polaris10_thermal_avfs_enable(struct pp_hwmgr *hwmgr) -{ - int ret; - struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(hwmgr->smu_backend); - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED) - return 0; - - ret = smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SetGBDroopSettings, data->avfs_vdroop_override_setting); - - ret = (smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableAvfs) == 0) ? - 0 : -1; - - if (!ret) - /* If this param is not changed, this function could fire unnecessarily */ - smu_data->avfs.avfs_btc_status = AVFS_BTC_COMPLETED_PREVIOUSLY; - - return ret; -} - -/** -* Set up the fan table to control the fan using the SMC. -* @param hwmgr the address of the powerplay hardware manager. -* @param pInput the pointer to input data -* @param pOutput the pointer to output data -* @param pStorage the pointer to temporary storage -* @param Result the last failure code -* @return result from set temperature range routine -*/ -int polaris10_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) -{ - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - SMU74_Discrete_FanTable fan_table = { FDO_MODE_HARDWARE }; - uint32_t duty100; - uint32_t t_diff1, t_diff2, pwm_diff1, pwm_diff2; - uint16_t fdo_min, slope1, slope2; - uint32_t reference_clock; - int res; - uint64_t tmp64; - - if (hwmgr->thermal_controller.fanInfo.bNoFan) { - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_MicrocodeFanControl); - return 0; - } - - if (smu_data->smu7_data.fan_table_start == 0) { - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_MicrocodeFanControl); - return 0; - } - - duty100 = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, - CG_FDO_CTRL1, FMAX_DUTY100); - - if (duty100 == 0) { - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_MicrocodeFanControl); - return 0; - } - - tmp64 = hwmgr->thermal_controller.advanceFanControlParameters. - usPWMMin * duty100; - do_div(tmp64, 10000); - fdo_min = (uint16_t)tmp64; - - t_diff1 = hwmgr->thermal_controller.advanceFanControlParameters.usTMed - - hwmgr->thermal_controller.advanceFanControlParameters.usTMin; - t_diff2 = hwmgr->thermal_controller.advanceFanControlParameters.usTHigh - - hwmgr->thermal_controller.advanceFanControlParameters.usTMed; - - pwm_diff1 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMMed - - hwmgr->thermal_controller.advanceFanControlParameters.usPWMMin; - pwm_diff2 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMHigh - - hwmgr->thermal_controller.advanceFanControlParameters.usPWMMed; - - slope1 = (uint16_t)((50 + ((16 * duty100 * pwm_diff1) / t_diff1)) / 100); - slope2 = (uint16_t)((50 + ((16 * duty100 * pwm_diff2) / t_diff2)) / 100); - - fan_table.TempMin = cpu_to_be16((50 + hwmgr-> - thermal_controller.advanceFanControlParameters.usTMin) / 100); - fan_table.TempMed = cpu_to_be16((50 + hwmgr-> - thermal_controller.advanceFanControlParameters.usTMed) / 100); - fan_table.TempMax = cpu_to_be16((50 + hwmgr-> - thermal_controller.advanceFanControlParameters.usTMax) / 100); - - fan_table.Slope1 = cpu_to_be16(slope1); - fan_table.Slope2 = cpu_to_be16(slope2); - - fan_table.FdoMin = cpu_to_be16(fdo_min); - - fan_table.HystDown = cpu_to_be16(hwmgr-> - thermal_controller.advanceFanControlParameters.ucTHyst); - - fan_table.HystUp = cpu_to_be16(1); - - fan_table.HystSlope = cpu_to_be16(1); - - fan_table.TempRespLim = cpu_to_be16(5); - - reference_clock = smu7_get_xclk(hwmgr); - - fan_table.RefreshPeriod = cpu_to_be32((hwmgr-> - thermal_controller.advanceFanControlParameters.ulCycleDelay * - reference_clock) / 1600); - - fan_table.FdoMax = cpu_to_be16((uint16_t)duty100); - - fan_table.TempSrc = (uint8_t)PHM_READ_VFPF_INDIRECT_FIELD( - hwmgr->device, CGS_IND_REG__SMC, - CG_MULT_THERMAL_CTRL, TEMP_SEL); - - res = smu7_copy_bytes_to_smc(hwmgr, smu_data->smu7_data.fan_table_start, - (uint8_t *)&fan_table, (uint32_t)sizeof(fan_table), - SMC_RAM_END); - - if (!res && hwmgr->thermal_controller. - advanceFanControlParameters.ucMinimumPWMLimit) - res = smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SetFanMinPwm, - hwmgr->thermal_controller. - advanceFanControlParameters.ucMinimumPWMLimit); - - if (!res && hwmgr->thermal_controller. - advanceFanControlParameters.ulMinFanSCLKAcousticLimit) - res = smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SetFanSclkTarget, - hwmgr->thermal_controller. - advanceFanControlParameters.ulMinFanSCLKAcousticLimit); - - if (res) - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_MicrocodeFanControl); - - return 0; -} - -static int polaris10_update_uvd_smc_table(struct pp_hwmgr *hwmgr) -{ - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - uint32_t mm_boot_level_offset, mm_boot_level_value; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - - smu_data->smc_state_table.UvdBootLevel = 0; - if (table_info->mm_dep_table->count > 0) - smu_data->smc_state_table.UvdBootLevel = - (uint8_t) (table_info->mm_dep_table->count - 1); - mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + offsetof(SMU74_Discrete_DpmTable, - UvdBootLevel); - mm_boot_level_offset /= 4; - mm_boot_level_offset *= 4; - mm_boot_level_value = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset); - mm_boot_level_value &= 0x00FFFFFF; - mm_boot_level_value |= smu_data->smc_state_table.UvdBootLevel << 24; - cgs_write_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); - - if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_UVDDPM) || - phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_StablePState)) - smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_UVDDPM_SetEnabledMask, - (uint32_t)(1 << smu_data->smc_state_table.UvdBootLevel)); - return 0; -} - -static int polaris10_update_vce_smc_table(struct pp_hwmgr *hwmgr) -{ - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - uint32_t mm_boot_level_offset, mm_boot_level_value; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_StablePState)) - smu_data->smc_state_table.VceBootLevel = - (uint8_t) (table_info->mm_dep_table->count - 1); - else - smu_data->smc_state_table.VceBootLevel = 0; - - mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + - offsetof(SMU74_Discrete_DpmTable, VceBootLevel); - mm_boot_level_offset /= 4; - mm_boot_level_offset *= 4; - mm_boot_level_value = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset); - mm_boot_level_value &= 0xFF00FFFF; - mm_boot_level_value |= smu_data->smc_state_table.VceBootLevel << 16; - cgs_write_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState)) - smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_VCEDPM_SetEnabledMask, - (uint32_t)1 << smu_data->smc_state_table.VceBootLevel); - return 0; -} - -static int polaris10_update_samu_smc_table(struct pp_hwmgr *hwmgr) -{ - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - uint32_t mm_boot_level_offset, mm_boot_level_value; - - - smu_data->smc_state_table.SamuBootLevel = 0; - mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + - offsetof(SMU74_Discrete_DpmTable, SamuBootLevel); - - mm_boot_level_offset /= 4; - mm_boot_level_offset *= 4; - mm_boot_level_value = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset); - mm_boot_level_value &= 0xFFFFFF00; - mm_boot_level_value |= smu_data->smc_state_table.SamuBootLevel << 0; - cgs_write_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_StablePState)) - smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SAMUDPM_SetEnabledMask, - (uint32_t)(1 << smu_data->smc_state_table.SamuBootLevel)); - return 0; -} - - -static int polaris10_update_bif_smc_table(struct pp_hwmgr *hwmgr) -{ - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_ppt_v1_pcie_table *pcie_table = table_info->pcie_table; - int max_entry, i; - - max_entry = (SMU74_MAX_LEVELS_LINK < pcie_table->count) ? - SMU74_MAX_LEVELS_LINK : - pcie_table->count; - /* Setup BIF_SCLK levels */ - for (i = 0; i < max_entry; i++) - smu_data->bif_sclk_table[i] = pcie_table->entries[i].pcie_sclk; - return 0; -} - -int polaris10_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type) -{ - switch (type) { - case SMU_UVD_TABLE: - polaris10_update_uvd_smc_table(hwmgr); - break; - case SMU_VCE_TABLE: - polaris10_update_vce_smc_table(hwmgr); - break; - case SMU_SAMU_TABLE: - polaris10_update_samu_smc_table(hwmgr); - break; - case SMU_BIF_TABLE: - polaris10_update_bif_smc_table(hwmgr); - default: - break; - } - return 0; -} - -int polaris10_update_sclk_threshold(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - - int result = 0; - uint32_t low_sclk_interrupt_threshold = 0; - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_SclkThrottleLowNotification) - && (hwmgr->gfx_arbiter.sclk_threshold != - data->low_sclk_interrupt_threshold)) { - data->low_sclk_interrupt_threshold = - hwmgr->gfx_arbiter.sclk_threshold; - low_sclk_interrupt_threshold = - data->low_sclk_interrupt_threshold; - - CONVERT_FROM_HOST_TO_SMC_UL(low_sclk_interrupt_threshold); - - result = smu7_copy_bytes_to_smc( - hwmgr, - smu_data->smu7_data.dpm_table_start + - offsetof(SMU74_Discrete_DpmTable, - LowSclkInterruptThreshold), - (uint8_t *)&low_sclk_interrupt_threshold, - sizeof(uint32_t), - SMC_RAM_END); - } - PP_ASSERT_WITH_CODE((result == 0), - "Failed to update SCLK threshold!", return result); - - result = polaris10_program_mem_timing_parameters(hwmgr); - PP_ASSERT_WITH_CODE((result == 0), - "Failed to program memory timing parameters!", - ); - - return result; -} - -uint32_t polaris10_get_offsetof(uint32_t type, uint32_t member) -{ - switch (type) { - case SMU_SoftRegisters: - switch (member) { - case HandshakeDisables: - return offsetof(SMU74_SoftRegisters, HandshakeDisables); - case VoltageChangeTimeout: - return offsetof(SMU74_SoftRegisters, VoltageChangeTimeout); - case AverageGraphicsActivity: - return offsetof(SMU74_SoftRegisters, AverageGraphicsActivity); - case PreVBlankGap: - return offsetof(SMU74_SoftRegisters, PreVBlankGap); - case VBlankTimeout: - return offsetof(SMU74_SoftRegisters, VBlankTimeout); - case UcodeLoadStatus: - return offsetof(SMU74_SoftRegisters, UcodeLoadStatus); - } - case SMU_Discrete_DpmTable: - switch (member) { - case UvdBootLevel: - return offsetof(SMU74_Discrete_DpmTable, UvdBootLevel); - case VceBootLevel: - return offsetof(SMU74_Discrete_DpmTable, VceBootLevel); - case SamuBootLevel: - return offsetof(SMU74_Discrete_DpmTable, SamuBootLevel); - case LowSclkInterruptThreshold: - return offsetof(SMU74_Discrete_DpmTable, LowSclkInterruptThreshold); - } - } - pr_warn("can't get the offset of type %x member %x\n", type, member); - return 0; -} - -uint32_t polaris10_get_mac_definition(uint32_t value) -{ - switch (value) { - case SMU_MAX_LEVELS_GRAPHICS: - return SMU74_MAX_LEVELS_GRAPHICS; - case SMU_MAX_LEVELS_MEMORY: - return SMU74_MAX_LEVELS_MEMORY; - case SMU_MAX_LEVELS_LINK: - return SMU74_MAX_LEVELS_LINK; - case SMU_MAX_ENTRIES_SMIO: - return SMU74_MAX_ENTRIES_SMIO; - case SMU_MAX_LEVELS_VDDC: - return SMU74_MAX_LEVELS_VDDC; - case SMU_MAX_LEVELS_VDDGFX: - return SMU74_MAX_LEVELS_VDDGFX; - case SMU_MAX_LEVELS_VDDCI: - return SMU74_MAX_LEVELS_VDDCI; - case SMU_MAX_LEVELS_MVDD: - return SMU74_MAX_LEVELS_MVDD; - case SMU_UVD_MCLK_HANDSHAKE_DISABLE: - return SMU7_UVD_MCLK_HANDSHAKE_DISABLE; - } - - pr_warn("can't get the mac of %x\n", value); - return 0; -} - -/** -* Get the location of various tables inside the FW image. -* -* @param hwmgr the address of the powerplay hardware manager. -* @return always 0 -*/ -int polaris10_process_firmware_header(struct pp_hwmgr *hwmgr) -{ - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - uint32_t tmp; - int result; - bool error = false; - - result = smu7_read_smc_sram_dword(hwmgr, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU74_Firmware_Header, DpmTable), - &tmp, SMC_RAM_END); - - if (0 == result) - smu_data->smu7_data.dpm_table_start = tmp; - - error |= (0 != result); - - result = smu7_read_smc_sram_dword(hwmgr, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU74_Firmware_Header, SoftRegisters), - &tmp, SMC_RAM_END); - - if (!result) { - data->soft_regs_start = tmp; - smu_data->smu7_data.soft_regs_start = tmp; - } - - error |= (0 != result); - - result = smu7_read_smc_sram_dword(hwmgr, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU74_Firmware_Header, mcRegisterTable), - &tmp, SMC_RAM_END); - - if (!result) - smu_data->smu7_data.mc_reg_table_start = tmp; - - result = smu7_read_smc_sram_dword(hwmgr, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU74_Firmware_Header, FanTable), - &tmp, SMC_RAM_END); - - if (!result) - smu_data->smu7_data.fan_table_start = tmp; - - error |= (0 != result); - - result = smu7_read_smc_sram_dword(hwmgr, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU74_Firmware_Header, mcArbDramTimingTable), - &tmp, SMC_RAM_END); - - if (!result) - smu_data->smu7_data.arb_table_start = tmp; - - error |= (0 != result); - - result = smu7_read_smc_sram_dword(hwmgr, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU74_Firmware_Header, Version), - &tmp, SMC_RAM_END); - - if (!result) - hwmgr->microcode_version_info.SMC = tmp; - - error |= (0 != result); - - return error ? -1 : 0; -} - -bool polaris10_is_dpm_running(struct pp_hwmgr *hwmgr) -{ - return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) - ? true : false; -} - -int polaris10_populate_requested_graphic_levels(struct pp_hwmgr *hwmgr, - struct amd_pp_profile *request) -{ - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *) - (hwmgr->smu_backend); - struct SMU74_Discrete_GraphicsLevel *levels = - smu_data->smc_state_table.GraphicsLevel; - uint32_t array = smu_data->smu7_data.dpm_table_start + - offsetof(SMU74_Discrete_DpmTable, GraphicsLevel); - uint32_t array_size = sizeof(struct SMU74_Discrete_GraphicsLevel) * - SMU74_MAX_LEVELS_GRAPHICS; - uint32_t i; - - for (i = 0; i < smu_data->smc_state_table.GraphicsDpmLevelCount; i++) { - levels[i].ActivityLevel = - cpu_to_be16(request->activity_threshold); - levels[i].EnabledForActivity = 1; - levels[i].UpHyst = request->up_hyst; - levels[i].DownHyst = request->down_hyst; - } - - return smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels, - array_size, SMC_RAM_END); -} diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index 61ee6281fbda..bd6be7793ca7 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -35,13 +35,47 @@ #include "gca/gfx_8_0_d.h" #include "bif/bif_5_0_d.h" #include "bif/bif_5_0_sh_mask.h" -#include "polaris10_pwrvirus.h" #include "ppatomctrl.h" #include "cgs_common.h" -#include "polaris10_smc.h" #include "smu7_ppsmc.h" #include "smu7_smumgr.h" +#include "smu7_dyn_defaults.h" + +#include "smu7_hwmgr.h" +#include "hardwaremanager.h" +#include "ppatomctrl.h" +#include "atombios.h" +#include "pppcielanes.h" + +#include "dce/dce_10_0_d.h" +#include "dce/dce_10_0_sh_mask.h" + +#define POLARIS10_SMC_SIZE 0x20000 +#define VOLTAGE_VID_OFFSET_SCALE1 625 +#define VOLTAGE_VID_OFFSET_SCALE2 100 +#define POWERTUNE_DEFAULT_SET_MAX 1 +#define VDDC_VDDCI_DELTA 200 +#define MC_CG_ARB_FREQ_F1 0x0b + +static const struct polaris10_pt_defaults polaris10_power_tune_data_set_array[POWERTUNE_DEFAULT_SET_MAX] = { + /* sviLoadLIneEn, SviLoadLineVddC, TDC_VDDC_ThrottleReleaseLimitPerc, TDC_MAWt, + * TdcWaterfallCtl, DTEAmbientTempBase, DisplayCac, BAPM_TEMP_GRADIENT */ + { 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0xB0000, + { 0x79, 0x253, 0x25D, 0xAE, 0x72, 0x80, 0x83, 0x86, 0x6F, 0xC8, 0xC9, 0xC9, 0x2F, 0x4D, 0x61}, + { 0x17C, 0x172, 0x180, 0x1BC, 0x1B3, 0x1BD, 0x206, 0x200, 0x203, 0x25D, 0x25A, 0x255, 0x2C3, 0x2C5, 0x2B4 } }, +}; + +static const sclkFcwRange_t Range_Table[NUM_SCLK_RANGE] = { + {VCO_2_4, POSTDIV_DIV_BY_16, 75, 160, 112}, + {VCO_3_6, POSTDIV_DIV_BY_16, 112, 224, 160}, + {VCO_2_4, POSTDIV_DIV_BY_8, 75, 160, 112}, + {VCO_3_6, POSTDIV_DIV_BY_8, 112, 224, 160}, + {VCO_2_4, POSTDIV_DIV_BY_4, 75, 160, 112}, + {VCO_3_6, POSTDIV_DIV_BY_4, 112, 216, 160}, + {VCO_2_4, POSTDIV_DIV_BY_2, 75, 160, 108}, + {VCO_3_6, POSTDIV_DIV_BY_2, 112, 216, 160} }; + #define PPPOLARIS10_TARGETACTIVITY_DFLT 50 static const SMU74_Discrete_GraphicsLevel avfs_graphics_level_polaris10[8] = { @@ -60,46 +94,6 @@ static const SMU74_Discrete_GraphicsLevel avfs_graphics_level_polaris10[8] = { static const SMU74_Discrete_MemoryLevel avfs_memory_level_polaris10 = { 0x100ea446, 0, 0x30750000, 0x01, 0x01, 0x01, 0x00, 0x00, 0x64, 0x00, 0x00, 0x1f00, 0x00, 0x00}; -static void execute_pwr_table(struct pp_hwmgr *hwmgr, const PWR_Command_Table *pvirus, int size) -{ - int i; - uint32_t reg, data; - - for (i = 0; i < size; i++) { - reg = pvirus->reg; - data = pvirus->data; - if (reg != 0xffffffff) - cgs_write_register(hwmgr->device, reg, data); - else - break; - pvirus++; - } -} - -static void execute_pwr_dfy_table(struct pp_hwmgr *hwmgr, const PWR_DFY_Section *section) -{ - int i; - cgs_write_register(hwmgr->device, mmCP_DFY_CNTL, section->dfy_cntl); - cgs_write_register(hwmgr->device, mmCP_DFY_ADDR_HI, section->dfy_addr_hi); - cgs_write_register(hwmgr->device, mmCP_DFY_ADDR_LO, section->dfy_addr_lo); - for (i = 0; i < section->dfy_size; i++) - cgs_write_register(hwmgr->device, mmCP_DFY_DATA_0, section->dfy_data[i]); -} - -static int polaris10_setup_pwr_virus(struct pp_hwmgr *hwmgr) -{ - execute_pwr_table(hwmgr, pwr_virus_table_pre, ARRAY_SIZE(pwr_virus_table_pre)); - execute_pwr_dfy_table(hwmgr, &pwr_virus_section1); - execute_pwr_dfy_table(hwmgr, &pwr_virus_section2); - execute_pwr_dfy_table(hwmgr, &pwr_virus_section3); - execute_pwr_dfy_table(hwmgr, &pwr_virus_section4); - execute_pwr_dfy_table(hwmgr, &pwr_virus_section5); - execute_pwr_dfy_table(hwmgr, &pwr_virus_section6); - execute_pwr_table(hwmgr, pwr_virus_table_post, ARRAY_SIZE(pwr_virus_table_post)); - - return 0; -} - static int polaris10_perform_btc(struct pp_hwmgr *hwmgr) { int result = 0; @@ -197,7 +191,7 @@ polaris10_avfs_event_mgr(struct pp_hwmgr *hwmgr, bool SMU_VFT_INTACT) if (smu_data->avfs.avfs_btc_param > 1) { pr_info("[AVFS][Polaris10_AVFSEventMgr] AC BTC has not been successfully verified on Fiji. There may be in this setting."); smu_data->avfs.avfs_btc_status = AVFS_BTC_VIRUS_FAIL; - PP_ASSERT_WITH_CODE(0 == polaris10_setup_pwr_virus(hwmgr), + PP_ASSERT_WITH_CODE(0 == smu7_setup_pwr_virus(hwmgr), "[AVFS][Polaris10_AVFSEventMgr] Could not setup Pwr Virus for AVFS ", return -EINVAL); } @@ -389,6 +383,2195 @@ static int polaris10_smu_init(struct pp_hwmgr *hwmgr) return 0; } +static int polaris10_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr, + struct phm_ppt_v1_clock_voltage_dependency_table *dep_table, + uint32_t clock, SMU_VoltageLevel *voltage, uint32_t *mvdd) +{ + uint32_t i; + uint16_t vddci; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + *voltage = *mvdd = 0; + + /* clock - voltage dependency table is empty table */ + if (dep_table->count == 0) + return -EINVAL; + + for (i = 0; i < dep_table->count; i++) { + /* find first sclk bigger than request */ + if (dep_table->entries[i].clk >= clock) { + *voltage |= (dep_table->entries[i].vddc * + VOLTAGE_SCALE) << VDDC_SHIFT; + if (SMU7_VOLTAGE_CONTROL_NONE == data->vddci_control) + *voltage |= (data->vbios_boot_state.vddci_bootup_value * + VOLTAGE_SCALE) << VDDCI_SHIFT; + else if (dep_table->entries[i].vddci) + *voltage |= (dep_table->entries[i].vddci * + VOLTAGE_SCALE) << VDDCI_SHIFT; + else { + vddci = phm_find_closest_vddci(&(data->vddci_voltage_table), + (dep_table->entries[i].vddc - + (uint16_t)VDDC_VDDCI_DELTA)); + *voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; + } + + if (SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) + *mvdd = data->vbios_boot_state.mvdd_bootup_value * + VOLTAGE_SCALE; + else if (dep_table->entries[i].mvdd) + *mvdd = (uint32_t) dep_table->entries[i].mvdd * + VOLTAGE_SCALE; + + *voltage |= 1 << PHASES_SHIFT; + return 0; + } + } + + /* sclk is bigger than max sclk in the dependence table */ + *voltage |= (dep_table->entries[i - 1].vddc * VOLTAGE_SCALE) << VDDC_SHIFT; + + if (SMU7_VOLTAGE_CONTROL_NONE == data->vddci_control) + *voltage |= (data->vbios_boot_state.vddci_bootup_value * + VOLTAGE_SCALE) << VDDCI_SHIFT; + else if (dep_table->entries[i-1].vddci) { + vddci = phm_find_closest_vddci(&(data->vddci_voltage_table), + (dep_table->entries[i].vddc - + (uint16_t)VDDC_VDDCI_DELTA)); + *voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; + } + + if (SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) + *mvdd = data->vbios_boot_state.mvdd_bootup_value * VOLTAGE_SCALE; + else if (dep_table->entries[i].mvdd) + *mvdd = (uint32_t) dep_table->entries[i - 1].mvdd * VOLTAGE_SCALE; + + return 0; +} + +static uint16_t scale_fan_gain_settings(uint16_t raw_setting) +{ + uint32_t tmp; + tmp = raw_setting * 4096 / 100; + return (uint16_t)tmp; +} + +static int polaris10_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr) +{ + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + + const struct polaris10_pt_defaults *defaults = smu_data->power_tune_defaults; + SMU74_Discrete_DpmTable *table = &(smu_data->smc_state_table); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_cac_tdp_table *cac_dtp_table = table_info->cac_dtp_table; + struct pp_advance_fan_control_parameters *fan_table = + &hwmgr->thermal_controller.advanceFanControlParameters; + int i, j, k; + const uint16_t *pdef1; + const uint16_t *pdef2; + + table->DefaultTdp = PP_HOST_TO_SMC_US((uint16_t)(cac_dtp_table->usTDP * 128)); + table->TargetTdp = PP_HOST_TO_SMC_US((uint16_t)(cac_dtp_table->usTDP * 128)); + + PP_ASSERT_WITH_CODE(cac_dtp_table->usTargetOperatingTemp <= 255, + "Target Operating Temp is out of Range!", + ); + + table->TemperatureLimitEdge = PP_HOST_TO_SMC_US( + cac_dtp_table->usTargetOperatingTemp * 256); + table->TemperatureLimitHotspot = PP_HOST_TO_SMC_US( + cac_dtp_table->usTemperatureLimitHotspot * 256); + table->FanGainEdge = PP_HOST_TO_SMC_US( + scale_fan_gain_settings(fan_table->usFanGainEdge)); + table->FanGainHotspot = PP_HOST_TO_SMC_US( + scale_fan_gain_settings(fan_table->usFanGainHotspot)); + + pdef1 = defaults->BAPMTI_R; + pdef2 = defaults->BAPMTI_RC; + + for (i = 0; i < SMU74_DTE_ITERATIONS; i++) { + for (j = 0; j < SMU74_DTE_SOURCES; j++) { + for (k = 0; k < SMU74_DTE_SINKS; k++) { + table->BAPMTI_R[i][j][k] = PP_HOST_TO_SMC_US(*pdef1); + table->BAPMTI_RC[i][j][k] = PP_HOST_TO_SMC_US(*pdef2); + pdef1++; + pdef2++; + } + } + } + + return 0; +} + +static int polaris10_populate_svi_load_line(struct pp_hwmgr *hwmgr) +{ + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + const struct polaris10_pt_defaults *defaults = smu_data->power_tune_defaults; + + smu_data->power_tune_table.SviLoadLineEn = defaults->SviLoadLineEn; + smu_data->power_tune_table.SviLoadLineVddC = defaults->SviLoadLineVddC; + smu_data->power_tune_table.SviLoadLineTrimVddC = 3; + smu_data->power_tune_table.SviLoadLineOffsetVddC = 0; + + return 0; +} + +static int polaris10_populate_tdc_limit(struct pp_hwmgr *hwmgr) +{ + uint16_t tdc_limit; + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + const struct polaris10_pt_defaults *defaults = smu_data->power_tune_defaults; + + tdc_limit = (uint16_t)(table_info->cac_dtp_table->usTDC * 128); + smu_data->power_tune_table.TDC_VDDC_PkgLimit = + CONVERT_FROM_HOST_TO_SMC_US(tdc_limit); + smu_data->power_tune_table.TDC_VDDC_ThrottleReleaseLimitPerc = + defaults->TDC_VDDC_ThrottleReleaseLimitPerc; + smu_data->power_tune_table.TDC_MAWt = defaults->TDC_MAWt; + + return 0; +} + +static int polaris10_populate_dw8(struct pp_hwmgr *hwmgr, uint32_t fuse_table_offset) +{ + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + const struct polaris10_pt_defaults *defaults = smu_data->power_tune_defaults; + uint32_t temp; + + if (smu7_read_smc_sram_dword(hwmgr, + fuse_table_offset + + offsetof(SMU74_Discrete_PmFuses, TdcWaterfallCtl), + (uint32_t *)&temp, SMC_RAM_END)) + PP_ASSERT_WITH_CODE(false, + "Attempt to read PmFuses.DW6 (SviLoadLineEn) from SMC Failed!", + return -EINVAL); + else { + smu_data->power_tune_table.TdcWaterfallCtl = defaults->TdcWaterfallCtl; + smu_data->power_tune_table.LPMLTemperatureMin = + (uint8_t)((temp >> 16) & 0xff); + smu_data->power_tune_table.LPMLTemperatureMax = + (uint8_t)((temp >> 8) & 0xff); + smu_data->power_tune_table.Reserved = (uint8_t)(temp & 0xff); + } + return 0; +} + +static int polaris10_populate_temperature_scaler(struct pp_hwmgr *hwmgr) +{ + int i; + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + + /* Currently not used. Set all to zero. */ + for (i = 0; i < 16; i++) + smu_data->power_tune_table.LPMLTemperatureScaler[i] = 0; + + return 0; +} + +static int polaris10_populate_fuzzy_fan(struct pp_hwmgr *hwmgr) +{ + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + +/* TO DO move to hwmgr */ + if ((hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity & (1 << 15)) + || 0 == hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity) + hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity = + hwmgr->thermal_controller.advanceFanControlParameters.usDefaultFanOutputSensitivity; + + smu_data->power_tune_table.FuzzyFan_PwmSetDelta = PP_HOST_TO_SMC_US( + hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity); + return 0; +} + +static int polaris10_populate_gnb_lpml(struct pp_hwmgr *hwmgr) +{ + int i; + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + + /* Currently not used. Set all to zero. */ + for (i = 0; i < 16; i++) + smu_data->power_tune_table.GnbLPML[i] = 0; + + return 0; +} + +static int polaris10_populate_bapm_vddc_base_leakage_sidd(struct pp_hwmgr *hwmgr) +{ + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + uint16_t hi_sidd = smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd; + uint16_t lo_sidd = smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd; + struct phm_cac_tdp_table *cac_table = table_info->cac_dtp_table; + + hi_sidd = (uint16_t)(cac_table->usHighCACLeakage / 100 * 256); + lo_sidd = (uint16_t)(cac_table->usLowCACLeakage / 100 * 256); + + smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd = + CONVERT_FROM_HOST_TO_SMC_US(hi_sidd); + smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd = + CONVERT_FROM_HOST_TO_SMC_US(lo_sidd); + + return 0; +} + +static int polaris10_populate_pm_fuses(struct pp_hwmgr *hwmgr) +{ + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + uint32_t pm_fuse_table_offset; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_PowerContainment)) { + if (smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU74_Firmware_Header, PmFuseTable), + &pm_fuse_table_offset, SMC_RAM_END)) + PP_ASSERT_WITH_CODE(false, + "Attempt to get pm_fuse_table_offset Failed!", + return -EINVAL); + + if (polaris10_populate_svi_load_line(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate SviLoadLine Failed!", + return -EINVAL); + + if (polaris10_populate_tdc_limit(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate TDCLimit Failed!", return -EINVAL); + + if (polaris10_populate_dw8(hwmgr, pm_fuse_table_offset)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate TdcWaterfallCtl, " + "LPMLTemperature Min and Max Failed!", + return -EINVAL); + + if (0 != polaris10_populate_temperature_scaler(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate LPMLTemperatureScaler Failed!", + return -EINVAL); + + if (polaris10_populate_fuzzy_fan(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate Fuzzy Fan Control parameters Failed!", + return -EINVAL); + + if (polaris10_populate_gnb_lpml(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate GnbLPML Failed!", + return -EINVAL); + + if (polaris10_populate_bapm_vddc_base_leakage_sidd(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate BapmVddCBaseLeakage Hi and Lo " + "Sidd Failed!", return -EINVAL); + + if (smu7_copy_bytes_to_smc(hwmgr, pm_fuse_table_offset, + (uint8_t *)&smu_data->power_tune_table, + (sizeof(struct SMU74_Discrete_PmFuses) - 92), SMC_RAM_END)) + PP_ASSERT_WITH_CODE(false, + "Attempt to download PmFuseTable Failed!", + return -EINVAL); + } + return 0; +} + +static int polaris10_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr, + SMU74_Discrete_DpmTable *table) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t count, level; + + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) { + count = data->mvdd_voltage_table.count; + if (count > SMU_MAX_SMIO_LEVELS) + count = SMU_MAX_SMIO_LEVELS; + for (level = 0; level < count; level++) { + table->SmioTable2.Pattern[level].Voltage = + PP_HOST_TO_SMC_US(data->mvdd_voltage_table.entries[count].value * VOLTAGE_SCALE); + /* Index into DpmTable.Smio. Drive bits from Smio entry to get this voltage level.*/ + table->SmioTable2.Pattern[level].Smio = + (uint8_t) level; + table->Smio[level] |= + data->mvdd_voltage_table.entries[level].smio_low; + } + table->SmioMask2 = data->mvdd_voltage_table.mask_low; + + table->MvddLevelCount = (uint32_t) PP_HOST_TO_SMC_UL(count); + } + + return 0; +} + +static int polaris10_populate_smc_vddci_table(struct pp_hwmgr *hwmgr, + struct SMU74_Discrete_DpmTable *table) +{ + uint32_t count, level; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + count = data->vddci_voltage_table.count; + + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) { + if (count > SMU_MAX_SMIO_LEVELS) + count = SMU_MAX_SMIO_LEVELS; + for (level = 0; level < count; ++level) { + table->SmioTable1.Pattern[level].Voltage = + PP_HOST_TO_SMC_US(data->vddci_voltage_table.entries[level].value * VOLTAGE_SCALE); + table->SmioTable1.Pattern[level].Smio = (uint8_t) level; + + table->Smio[level] |= data->vddci_voltage_table.entries[level].smio_low; + } + } + + table->SmioMask1 = data->vddci_voltage_table.mask_low; + + return 0; +} + +static int polaris10_populate_cac_table(struct pp_hwmgr *hwmgr, + struct SMU74_Discrete_DpmTable *table) +{ + uint32_t count; + uint8_t index; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_voltage_lookup_table *lookup_table = + table_info->vddc_lookup_table; + /* tables is already swapped, so in order to use the value from it, + * we need to swap it back. + * We are populating vddc CAC data to BapmVddc table + * in split and merged mode + */ + for (count = 0; count < lookup_table->count; count++) { + index = phm_get_voltage_index(lookup_table, + data->vddc_voltage_table.entries[count].value); + table->BapmVddcVidLoSidd[count] = convert_to_vid(lookup_table->entries[index].us_cac_low); + table->BapmVddcVidHiSidd[count] = convert_to_vid(lookup_table->entries[index].us_cac_mid); + table->BapmVddcVidHiSidd2[count] = convert_to_vid(lookup_table->entries[index].us_cac_high); + } + + return 0; +} + +static int polaris10_populate_smc_voltage_tables(struct pp_hwmgr *hwmgr, + struct SMU74_Discrete_DpmTable *table) +{ + polaris10_populate_smc_vddci_table(hwmgr, table); + polaris10_populate_smc_mvdd_table(hwmgr, table); + polaris10_populate_cac_table(hwmgr, table); + + return 0; +} + +static int polaris10_populate_ulv_level(struct pp_hwmgr *hwmgr, + struct SMU74_Discrete_Ulv *state) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + state->CcPwrDynRm = 0; + state->CcPwrDynRm1 = 0; + + state->VddcOffset = (uint16_t) table_info->us_ulv_voltage_offset; + state->VddcOffsetVid = (uint8_t)(table_info->us_ulv_voltage_offset * + VOLTAGE_VID_OFFSET_SCALE2 / VOLTAGE_VID_OFFSET_SCALE1); + + if (hwmgr->chip_id == CHIP_POLARIS12 || hwmgr->is_kicker) + state->VddcPhase = data->vddc_phase_shed_control ^ 0x3; + else + state->VddcPhase = (data->vddc_phase_shed_control) ? 0 : 1; + + CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm1); + CONVERT_FROM_HOST_TO_SMC_US(state->VddcOffset); + + return 0; +} + +static int polaris10_populate_ulv_state(struct pp_hwmgr *hwmgr, + struct SMU74_Discrete_DpmTable *table) +{ + return polaris10_populate_ulv_level(hwmgr, &table->Ulv); +} + +static int polaris10_populate_smc_link_level(struct pp_hwmgr *hwmgr, + struct SMU74_Discrete_DpmTable *table) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + struct smu7_dpm_table *dpm_table = &data->dpm_table; + int i; + + /* Index (dpm_table->pcie_speed_table.count) + * is reserved for PCIE boot level. */ + for (i = 0; i <= dpm_table->pcie_speed_table.count; i++) { + table->LinkLevel[i].PcieGenSpeed = + (uint8_t)dpm_table->pcie_speed_table.dpm_levels[i].value; + table->LinkLevel[i].PcieLaneCount = (uint8_t)encode_pcie_lane_width( + dpm_table->pcie_speed_table.dpm_levels[i].param1); + table->LinkLevel[i].EnabledForActivity = 1; + table->LinkLevel[i].SPC = (uint8_t)(data->pcie_spc_cap & 0xff); + table->LinkLevel[i].DownThreshold = PP_HOST_TO_SMC_UL(5); + table->LinkLevel[i].UpThreshold = PP_HOST_TO_SMC_UL(30); + } + + smu_data->smc_state_table.LinkLevelCount = + (uint8_t)dpm_table->pcie_speed_table.count; + +/* To Do move to hwmgr */ + data->dpm_level_enable_mask.pcie_dpm_enable_mask = + phm_get_dpm_level_enable_mask_value(&dpm_table->pcie_speed_table); + + return 0; +} + + +static void polaris10_get_sclk_range_table(struct pp_hwmgr *hwmgr, + SMU74_Discrete_DpmTable *table) +{ + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + uint32_t i, ref_clk; + + struct pp_atom_ctrl_sclk_range_table range_table_from_vbios = { { {0} } }; + + ref_clk = smu7_get_xclk(hwmgr); + + if (0 == atomctrl_get_smc_sclk_range_table(hwmgr, &range_table_from_vbios)) { + for (i = 0; i < NUM_SCLK_RANGE; i++) { + table->SclkFcwRangeTable[i].vco_setting = range_table_from_vbios.entry[i].ucVco_setting; + table->SclkFcwRangeTable[i].postdiv = range_table_from_vbios.entry[i].ucPostdiv; + table->SclkFcwRangeTable[i].fcw_pcc = range_table_from_vbios.entry[i].usFcw_pcc; + + table->SclkFcwRangeTable[i].fcw_trans_upper = range_table_from_vbios.entry[i].usFcw_trans_upper; + table->SclkFcwRangeTable[i].fcw_trans_lower = range_table_from_vbios.entry[i].usRcw_trans_lower; + + CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_pcc); + CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_upper); + CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_lower); + } + return; + } + + for (i = 0; i < NUM_SCLK_RANGE; i++) { + smu_data->range_table[i].trans_lower_frequency = (ref_clk * Range_Table[i].fcw_trans_lower) >> Range_Table[i].postdiv; + smu_data->range_table[i].trans_upper_frequency = (ref_clk * Range_Table[i].fcw_trans_upper) >> Range_Table[i].postdiv; + + table->SclkFcwRangeTable[i].vco_setting = Range_Table[i].vco_setting; + table->SclkFcwRangeTable[i].postdiv = Range_Table[i].postdiv; + table->SclkFcwRangeTable[i].fcw_pcc = Range_Table[i].fcw_pcc; + + table->SclkFcwRangeTable[i].fcw_trans_upper = Range_Table[i].fcw_trans_upper; + table->SclkFcwRangeTable[i].fcw_trans_lower = Range_Table[i].fcw_trans_lower; + + CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_pcc); + CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_upper); + CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_lower); + } +} + +static int polaris10_calculate_sclk_params(struct pp_hwmgr *hwmgr, + uint32_t clock, SMU_SclkSetting *sclk_setting) +{ + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + const SMU74_Discrete_DpmTable *table = &(smu_data->smc_state_table); + struct pp_atomctrl_clock_dividers_ai dividers; + uint32_t ref_clock; + uint32_t pcc_target_percent, pcc_target_freq, ss_target_percent, ss_target_freq; + uint8_t i; + int result; + uint64_t temp; + + sclk_setting->SclkFrequency = clock; + /* get the engine clock dividers for this clock value */ + result = atomctrl_get_engine_pll_dividers_ai(hwmgr, clock, ÷rs); + if (result == 0) { + sclk_setting->Fcw_int = dividers.usSclk_fcw_int; + sclk_setting->Fcw_frac = dividers.usSclk_fcw_frac; + sclk_setting->Pcc_fcw_int = dividers.usPcc_fcw_int; + sclk_setting->PllRange = dividers.ucSclkPllRange; + sclk_setting->Sclk_slew_rate = 0x400; + sclk_setting->Pcc_up_slew_rate = dividers.usPcc_fcw_slew_frac; + sclk_setting->Pcc_down_slew_rate = 0xffff; + sclk_setting->SSc_En = dividers.ucSscEnable; + sclk_setting->Fcw1_int = dividers.usSsc_fcw1_int; + sclk_setting->Fcw1_frac = dividers.usSsc_fcw1_frac; + sclk_setting->Sclk_ss_slew_rate = dividers.usSsc_fcw_slew_frac; + return result; + } + + ref_clock = smu7_get_xclk(hwmgr); + + for (i = 0; i < NUM_SCLK_RANGE; i++) { + if (clock > smu_data->range_table[i].trans_lower_frequency + && clock <= smu_data->range_table[i].trans_upper_frequency) { + sclk_setting->PllRange = i; + break; + } + } + + sclk_setting->Fcw_int = (uint16_t)((clock << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv) / ref_clock); + temp = clock << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv; + temp <<= 0x10; + do_div(temp, ref_clock); + sclk_setting->Fcw_frac = temp & 0xffff; + + pcc_target_percent = 10; /* Hardcode 10% for now. */ + pcc_target_freq = clock - (clock * pcc_target_percent / 100); + sclk_setting->Pcc_fcw_int = (uint16_t)((pcc_target_freq << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv) / ref_clock); + + ss_target_percent = 2; /* Hardcode 2% for now. */ + sclk_setting->SSc_En = 0; + if (ss_target_percent) { + sclk_setting->SSc_En = 1; + ss_target_freq = clock - (clock * ss_target_percent / 100); + sclk_setting->Fcw1_int = (uint16_t)((ss_target_freq << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv) / ref_clock); + temp = ss_target_freq << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv; + temp <<= 0x10; + do_div(temp, ref_clock); + sclk_setting->Fcw1_frac = temp & 0xffff; + } + + return 0; +} + +static int polaris10_populate_single_graphic_level(struct pp_hwmgr *hwmgr, + uint32_t clock, uint16_t sclk_al_threshold, + struct SMU74_Discrete_GraphicsLevel *level) +{ + int result; + /* PP_Clocks minClocks; */ + uint32_t mvdd; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + SMU_SclkSetting curr_sclk_setting = { 0 }; + + result = polaris10_calculate_sclk_params(hwmgr, clock, &curr_sclk_setting); + + /* populate graphics levels */ + result = polaris10_get_dependency_volt_by_clk(hwmgr, + table_info->vdd_dep_on_sclk, clock, + &level->MinVoltage, &mvdd); + + PP_ASSERT_WITH_CODE((0 == result), + "can not find VDDC voltage value for " + "VDDC engine clock dependency table", + return result); + level->ActivityLevel = sclk_al_threshold; + + level->CcPwrDynRm = 0; + level->CcPwrDynRm1 = 0; + level->EnabledForActivity = 0; + level->EnabledForThrottle = 1; + level->UpHyst = 10; + level->DownHyst = 0; + level->VoltageDownHyst = 0; + level->PowerThrottle = 0; + data->display_timing.min_clock_in_sr = hwmgr->display_config.min_core_set_clock_in_sr; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) + level->DeepSleepDivId = smu7_get_sleep_divider_id_from_clock(clock, + hwmgr->display_config.min_core_set_clock_in_sr); + + /* Default to slow, highest DPM level will be + * set to PPSMC_DISPLAY_WATERMARK_LOW later. + */ + if (data->update_up_hyst) + level->UpHyst = (uint8_t)data->up_hyst; + if (data->update_down_hyst) + level->DownHyst = (uint8_t)data->down_hyst; + + level->SclkSetting = curr_sclk_setting; + + CONVERT_FROM_HOST_TO_SMC_UL(level->MinVoltage); + CONVERT_FROM_HOST_TO_SMC_UL(level->CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(level->CcPwrDynRm1); + CONVERT_FROM_HOST_TO_SMC_US(level->ActivityLevel); + CONVERT_FROM_HOST_TO_SMC_UL(level->SclkSetting.SclkFrequency); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw_int); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw_frac); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Pcc_fcw_int); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Sclk_slew_rate); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Pcc_up_slew_rate); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Pcc_down_slew_rate); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw1_int); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw1_frac); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Sclk_ss_slew_rate); + return 0; +} + +static int polaris10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend); + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + struct smu7_dpm_table *dpm_table = &hw_data->dpm_table; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_pcie_table *pcie_table = table_info->pcie_table; + uint8_t pcie_entry_cnt = (uint8_t) hw_data->dpm_table.pcie_speed_table.count; + int result = 0; + uint32_t array = smu_data->smu7_data.dpm_table_start + + offsetof(SMU74_Discrete_DpmTable, GraphicsLevel); + uint32_t array_size = sizeof(struct SMU74_Discrete_GraphicsLevel) * + SMU74_MAX_LEVELS_GRAPHICS; + struct SMU74_Discrete_GraphicsLevel *levels = + smu_data->smc_state_table.GraphicsLevel; + uint32_t i, max_entry; + uint8_t hightest_pcie_level_enabled = 0, + lowest_pcie_level_enabled = 0, + mid_pcie_level_enabled = 0, + count = 0; + + polaris10_get_sclk_range_table(hwmgr, &(smu_data->smc_state_table)); + + for (i = 0; i < dpm_table->sclk_table.count; i++) { + + result = polaris10_populate_single_graphic_level(hwmgr, + dpm_table->sclk_table.dpm_levels[i].value, + (uint16_t)smu_data->activity_target[i], + &(smu_data->smc_state_table.GraphicsLevel[i])); + if (result) + return result; + + /* Making sure only DPM level 0-1 have Deep Sleep Div ID populated. */ + if (i > 1) + levels[i].DeepSleepDivId = 0; + } + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SPLLShutdownSupport)) + smu_data->smc_state_table.GraphicsLevel[0].SclkSetting.SSc_En = 0; + + smu_data->smc_state_table.GraphicsLevel[0].EnabledForActivity = 1; + smu_data->smc_state_table.GraphicsDpmLevelCount = + (uint8_t)dpm_table->sclk_table.count; + hw_data->dpm_level_enable_mask.sclk_dpm_enable_mask = + phm_get_dpm_level_enable_mask_value(&dpm_table->sclk_table); + + + if (pcie_table != NULL) { + PP_ASSERT_WITH_CODE((1 <= pcie_entry_cnt), + "There must be 1 or more PCIE levels defined in PPTable.", + return -EINVAL); + max_entry = pcie_entry_cnt - 1; + for (i = 0; i < dpm_table->sclk_table.count; i++) + levels[i].pcieDpmLevel = + (uint8_t) ((i < max_entry) ? i : max_entry); + } else { + while (hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask && + ((hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1 << (hightest_pcie_level_enabled + 1))) != 0)) + hightest_pcie_level_enabled++; + + while (hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask && + ((hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1 << lowest_pcie_level_enabled)) == 0)) + lowest_pcie_level_enabled++; + + while ((count < hightest_pcie_level_enabled) && + ((hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1 << (lowest_pcie_level_enabled + 1 + count))) == 0)) + count++; + + mid_pcie_level_enabled = (lowest_pcie_level_enabled + 1 + count) < + hightest_pcie_level_enabled ? + (lowest_pcie_level_enabled + 1 + count) : + hightest_pcie_level_enabled; + + /* set pcieDpmLevel to hightest_pcie_level_enabled */ + for (i = 2; i < dpm_table->sclk_table.count; i++) + levels[i].pcieDpmLevel = hightest_pcie_level_enabled; + + /* set pcieDpmLevel to lowest_pcie_level_enabled */ + levels[0].pcieDpmLevel = lowest_pcie_level_enabled; + + /* set pcieDpmLevel to mid_pcie_level_enabled */ + levels[1].pcieDpmLevel = mid_pcie_level_enabled; + } + /* level count will send to smc once at init smc table and never change */ + result = smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels, + (uint32_t)array_size, SMC_RAM_END); + + return result; +} + + +static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr, + uint32_t clock, struct SMU74_Discrete_MemoryLevel *mem_level) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + int result = 0; + struct cgs_display_info info = {0, 0, NULL}; + uint32_t mclk_stutter_mode_threshold = 40000; + + cgs_get_active_displays_info(hwmgr->device, &info); + + if (table_info->vdd_dep_on_mclk) { + result = polaris10_get_dependency_volt_by_clk(hwmgr, + table_info->vdd_dep_on_mclk, clock, + &mem_level->MinVoltage, &mem_level->MinMvdd); + PP_ASSERT_WITH_CODE((0 == result), + "can not find MinVddc voltage value from memory " + "VDDC voltage dependency table", return result); + } + + mem_level->MclkFrequency = clock; + mem_level->EnabledForThrottle = 1; + mem_level->EnabledForActivity = 0; + mem_level->UpHyst = 0; + mem_level->DownHyst = 100; + mem_level->VoltageDownHyst = 0; + mem_level->ActivityLevel = (uint16_t)data->mclk_activity_target; + mem_level->StutterEnable = false; + mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + + data->display_timing.num_existing_displays = info.display_count; + + if (mclk_stutter_mode_threshold && + (clock <= mclk_stutter_mode_threshold) && + (PHM_READ_FIELD(hwmgr->device, DPG_PIPE_STUTTER_CONTROL, + STUTTER_ENABLE) & 0x1)) + mem_level->StutterEnable = true; + + if (!result) { + CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MinMvdd); + CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MclkFrequency); + CONVERT_FROM_HOST_TO_SMC_US(mem_level->ActivityLevel); + CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MinVoltage); + } + return result; +} + +static int polaris10_populate_all_memory_levels(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend); + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + struct smu7_dpm_table *dpm_table = &hw_data->dpm_table; + int result; + /* populate MCLK dpm table to SMU7 */ + uint32_t array = smu_data->smu7_data.dpm_table_start + + offsetof(SMU74_Discrete_DpmTable, MemoryLevel); + uint32_t array_size = sizeof(SMU74_Discrete_MemoryLevel) * + SMU74_MAX_LEVELS_MEMORY; + struct SMU74_Discrete_MemoryLevel *levels = + smu_data->smc_state_table.MemoryLevel; + uint32_t i; + + for (i = 0; i < dpm_table->mclk_table.count; i++) { + PP_ASSERT_WITH_CODE((0 != dpm_table->mclk_table.dpm_levels[i].value), + "can not populate memory level as memory clock is zero", + return -EINVAL); + result = polaris10_populate_single_memory_level(hwmgr, + dpm_table->mclk_table.dpm_levels[i].value, + &levels[i]); + if (i == dpm_table->mclk_table.count - 1) { + levels[i].DisplayWatermark = PPSMC_DISPLAY_WATERMARK_HIGH; + levels[i].EnabledForActivity = 1; + } + if (result) + return result; + } + + /* In order to prevent MC activity from stutter mode to push DPM up, + * the UVD change complements this by putting the MCLK in + * a higher state by default such that we are not affected by + * up threshold or and MCLK DPM latency. + */ + levels[0].ActivityLevel = 0x1f; + CONVERT_FROM_HOST_TO_SMC_US(levels[0].ActivityLevel); + + smu_data->smc_state_table.MemoryDpmLevelCount = + (uint8_t)dpm_table->mclk_table.count; + hw_data->dpm_level_enable_mask.mclk_dpm_enable_mask = + phm_get_dpm_level_enable_mask_value(&dpm_table->mclk_table); + + /* level count will send to smc once at init smc table and never change */ + result = smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels, + (uint32_t)array_size, SMC_RAM_END); + + return result; +} + +static int polaris10_populate_mvdd_value(struct pp_hwmgr *hwmgr, + uint32_t mclk, SMIO_Pattern *smio_pat) +{ + const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + uint32_t i = 0; + + if (SMU7_VOLTAGE_CONTROL_NONE != data->mvdd_control) { + /* find mvdd value which clock is more than request */ + for (i = 0; i < table_info->vdd_dep_on_mclk->count; i++) { + if (mclk <= table_info->vdd_dep_on_mclk->entries[i].clk) { + smio_pat->Voltage = data->mvdd_voltage_table.entries[i].value; + break; + } + } + PP_ASSERT_WITH_CODE(i < table_info->vdd_dep_on_mclk->count, + "MVDD Voltage is outside the supported range.", + return -EINVAL); + } else + return -EINVAL; + + return 0; +} + +static int polaris10_populate_smc_acpi_level(struct pp_hwmgr *hwmgr, + SMU74_Discrete_DpmTable *table) +{ + int result = 0; + uint32_t sclk_frequency; + const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + SMIO_Pattern vol_level; + uint32_t mvdd; + uint16_t us_mvdd; + + table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC; + + /* Get MinVoltage and Frequency from DPM0, + * already converted to SMC_UL */ + sclk_frequency = data->vbios_boot_state.sclk_bootup_value; + result = polaris10_get_dependency_volt_by_clk(hwmgr, + table_info->vdd_dep_on_sclk, + sclk_frequency, + &table->ACPILevel.MinVoltage, &mvdd); + PP_ASSERT_WITH_CODE((0 == result), + "Cannot find ACPI VDDC voltage value " + "in Clock Dependency Table", + ); + + result = polaris10_calculate_sclk_params(hwmgr, sclk_frequency, &(table->ACPILevel.SclkSetting)); + PP_ASSERT_WITH_CODE(result == 0, "Error retrieving Engine Clock dividers from VBIOS.", return result); + + table->ACPILevel.DeepSleepDivId = 0; + table->ACPILevel.CcPwrDynRm = 0; + table->ACPILevel.CcPwrDynRm1 = 0; + + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.Flags); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.MinVoltage); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm1); + + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SclkSetting.SclkFrequency); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw_int); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw_frac); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Pcc_fcw_int); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Sclk_slew_rate); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Pcc_up_slew_rate); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Pcc_down_slew_rate); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw1_int); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw1_frac); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Sclk_ss_slew_rate); + + + /* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */ + table->MemoryACPILevel.MclkFrequency = data->vbios_boot_state.mclk_bootup_value; + result = polaris10_get_dependency_volt_by_clk(hwmgr, + table_info->vdd_dep_on_mclk, + table->MemoryACPILevel.MclkFrequency, + &table->MemoryACPILevel.MinVoltage, &mvdd); + PP_ASSERT_WITH_CODE((0 == result), + "Cannot find ACPI VDDCI voltage value " + "in Clock Dependency Table", + ); + + us_mvdd = 0; + if ((SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) || + (data->mclk_dpm_key_disabled)) + us_mvdd = data->vbios_boot_state.mvdd_bootup_value; + else { + if (!polaris10_populate_mvdd_value(hwmgr, + data->dpm_table.mclk_table.dpm_levels[0].value, + &vol_level)) + us_mvdd = vol_level.Voltage; + } + + if (0 == polaris10_populate_mvdd_value(hwmgr, 0, &vol_level)) + table->MemoryACPILevel.MinMvdd = PP_HOST_TO_SMC_UL(vol_level.Voltage); + else + table->MemoryACPILevel.MinMvdd = 0; + + table->MemoryACPILevel.StutterEnable = false; + + table->MemoryACPILevel.EnabledForThrottle = 0; + table->MemoryACPILevel.EnabledForActivity = 0; + table->MemoryACPILevel.UpHyst = 0; + table->MemoryACPILevel.DownHyst = 100; + table->MemoryACPILevel.VoltageDownHyst = 0; + table->MemoryACPILevel.ActivityLevel = + PP_HOST_TO_SMC_US((uint16_t)data->mclk_activity_target); + + CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MclkFrequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MinVoltage); + + return result; +} + +static int polaris10_populate_smc_vce_level(struct pp_hwmgr *hwmgr, + SMU74_Discrete_DpmTable *table) +{ + int result = -EINVAL; + uint8_t count; + struct pp_atomctrl_clock_dividers_vi dividers; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = + table_info->mm_dep_table; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t vddci; + + table->VceLevelCount = (uint8_t)(mm_table->count); + table->VceBootLevel = 0; + + for (count = 0; count < table->VceLevelCount; count++) { + table->VceLevel[count].Frequency = mm_table->entries[count].eclk; + table->VceLevel[count].MinVoltage = 0; + table->VceLevel[count].MinVoltage |= + (mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT; + + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) + vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table), + mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); + else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) + vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA; + else + vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT; + + + table->VceLevel[count].MinVoltage |= + (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; + table->VceLevel[count].MinVoltage |= 1 << PHASES_SHIFT; + + /*retrieve divider value for VBIOS */ + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->VceLevel[count].Frequency, ÷rs); + PP_ASSERT_WITH_CODE((0 == result), + "can not find divide id for VCE engine clock", + return result); + + table->VceLevel[count].Divider = (uint8_t)dividers.pll_post_divider; + + CONVERT_FROM_HOST_TO_SMC_UL(table->VceLevel[count].Frequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->VceLevel[count].MinVoltage); + } + return result; +} + + +static int polaris10_populate_smc_samu_level(struct pp_hwmgr *hwmgr, + SMU74_Discrete_DpmTable *table) +{ + int result = -EINVAL; + uint8_t count; + struct pp_atomctrl_clock_dividers_vi dividers; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = + table_info->mm_dep_table; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t vddci; + + table->SamuBootLevel = 0; + table->SamuLevelCount = (uint8_t)(mm_table->count); + + for (count = 0; count < table->SamuLevelCount; count++) { + /* not sure whether we need evclk or not */ + table->SamuLevel[count].MinVoltage = 0; + table->SamuLevel[count].Frequency = mm_table->entries[count].samclock; + table->SamuLevel[count].MinVoltage |= (mm_table->entries[count].vddc * + VOLTAGE_SCALE) << VDDC_SHIFT; + + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) + vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table), + mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); + else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) + vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA; + else + vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT; + + table->SamuLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; + table->SamuLevel[count].MinVoltage |= 1 << PHASES_SHIFT; + + /* retrieve divider value for VBIOS */ + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->SamuLevel[count].Frequency, ÷rs); + PP_ASSERT_WITH_CODE((0 == result), + "can not find divide id for samu clock", return result); + + table->SamuLevel[count].Divider = (uint8_t)dividers.pll_post_divider; + + CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].Frequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].MinVoltage); + } + return result; +} + +static int polaris10_populate_memory_timing_parameters(struct pp_hwmgr *hwmgr, + int32_t eng_clock, int32_t mem_clock, + SMU74_Discrete_MCArbDramTimingTableEntry *arb_regs) +{ + uint32_t dram_timing; + uint32_t dram_timing2; + uint32_t burst_time; + int result; + + result = atomctrl_set_engine_dram_timings_rv770(hwmgr, + eng_clock, mem_clock); + PP_ASSERT_WITH_CODE(result == 0, + "Error calling VBIOS to set DRAM_TIMING.", return result); + + dram_timing = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING); + dram_timing2 = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING2); + burst_time = PHM_READ_FIELD(hwmgr->device, MC_ARB_BURST_TIME, STATE0); + + + arb_regs->McArbDramTiming = PP_HOST_TO_SMC_UL(dram_timing); + arb_regs->McArbDramTiming2 = PP_HOST_TO_SMC_UL(dram_timing2); + arb_regs->McArbBurstTime = (uint8_t)burst_time; + + return 0; +} + +static int polaris10_program_memory_timing_parameters(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend); + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + struct SMU74_Discrete_MCArbDramTimingTable arb_regs; + uint32_t i, j; + int result = 0; + + for (i = 0; i < hw_data->dpm_table.sclk_table.count; i++) { + for (j = 0; j < hw_data->dpm_table.mclk_table.count; j++) { + result = polaris10_populate_memory_timing_parameters(hwmgr, + hw_data->dpm_table.sclk_table.dpm_levels[i].value, + hw_data->dpm_table.mclk_table.dpm_levels[j].value, + &arb_regs.entries[i][j]); + if (result == 0) + result = atomctrl_set_ac_timing_ai(hwmgr, hw_data->dpm_table.mclk_table.dpm_levels[j].value, j); + if (result != 0) + return result; + } + } + + result = smu7_copy_bytes_to_smc( + hwmgr, + smu_data->smu7_data.arb_table_start, + (uint8_t *)&arb_regs, + sizeof(SMU74_Discrete_MCArbDramTimingTable), + SMC_RAM_END); + return result; +} + +static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr, + struct SMU74_Discrete_DpmTable *table) +{ + int result = -EINVAL; + uint8_t count; + struct pp_atomctrl_clock_dividers_vi dividers; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = + table_info->mm_dep_table; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t vddci; + + table->UvdLevelCount = (uint8_t)(mm_table->count); + table->UvdBootLevel = 0; + + for (count = 0; count < table->UvdLevelCount; count++) { + table->UvdLevel[count].MinVoltage = 0; + table->UvdLevel[count].VclkFrequency = mm_table->entries[count].vclk; + table->UvdLevel[count].DclkFrequency = mm_table->entries[count].dclk; + table->UvdLevel[count].MinVoltage |= (mm_table->entries[count].vddc * + VOLTAGE_SCALE) << VDDC_SHIFT; + + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) + vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table), + mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); + else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) + vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA; + else + vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT; + + table->UvdLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; + table->UvdLevel[count].MinVoltage |= 1 << PHASES_SHIFT; + + /* retrieve divider value for VBIOS */ + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->UvdLevel[count].VclkFrequency, ÷rs); + PP_ASSERT_WITH_CODE((0 == result), + "can not find divide id for Vclk clock", return result); + + table->UvdLevel[count].VclkDivider = (uint8_t)dividers.pll_post_divider; + + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->UvdLevel[count].DclkFrequency, ÷rs); + PP_ASSERT_WITH_CODE((0 == result), + "can not find divide id for Dclk clock", return result); + + table->UvdLevel[count].DclkDivider = (uint8_t)dividers.pll_post_divider; + + CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].VclkFrequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].DclkFrequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].MinVoltage); + } + + return result; +} + +static int polaris10_populate_smc_boot_level(struct pp_hwmgr *hwmgr, + struct SMU74_Discrete_DpmTable *table) +{ + int result = 0; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + table->GraphicsBootLevel = 0; + table->MemoryBootLevel = 0; + + /* find boot level from dpm table */ + result = phm_find_boot_level(&(data->dpm_table.sclk_table), + data->vbios_boot_state.sclk_bootup_value, + (uint32_t *)&(table->GraphicsBootLevel)); + + result = phm_find_boot_level(&(data->dpm_table.mclk_table), + data->vbios_boot_state.mclk_bootup_value, + (uint32_t *)&(table->MemoryBootLevel)); + + table->BootVddc = data->vbios_boot_state.vddc_bootup_value * + VOLTAGE_SCALE; + table->BootVddci = data->vbios_boot_state.vddci_bootup_value * + VOLTAGE_SCALE; + table->BootMVdd = data->vbios_boot_state.mvdd_bootup_value * + VOLTAGE_SCALE; + + CONVERT_FROM_HOST_TO_SMC_US(table->BootVddc); + CONVERT_FROM_HOST_TO_SMC_US(table->BootVddci); + CONVERT_FROM_HOST_TO_SMC_US(table->BootMVdd); + + return 0; +} + +static int polaris10_populate_smc_initailial_state(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend); + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + uint8_t count, level; + + count = (uint8_t)(table_info->vdd_dep_on_sclk->count); + + for (level = 0; level < count; level++) { + if (table_info->vdd_dep_on_sclk->entries[level].clk >= + hw_data->vbios_boot_state.sclk_bootup_value) { + smu_data->smc_state_table.GraphicsBootLevel = level; + break; + } + } + + count = (uint8_t)(table_info->vdd_dep_on_mclk->count); + for (level = 0; level < count; level++) { + if (table_info->vdd_dep_on_mclk->entries[level].clk >= + hw_data->vbios_boot_state.mclk_bootup_value) { + smu_data->smc_state_table.MemoryBootLevel = level; + break; + } + } + + return 0; +} + +static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr) +{ + uint32_t ro, efuse, volt_without_cks, volt_with_cks, value, max, min; + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + + uint8_t i, stretch_amount, stretch_amount2, volt_offset = 0; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table = + table_info->vdd_dep_on_sclk; + + stretch_amount = (uint8_t)table_info->cac_dtp_table->usClockStretchAmount; + + /* Read SMU_Eefuse to read and calculate RO and determine + * if the part is SS or FF. if RO >= 1660MHz, part is FF. + */ + efuse = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixSMU_EFUSE_0 + (67 * 4)); + efuse &= 0xFF000000; + efuse = efuse >> 24; + + if (hwmgr->chip_id == CHIP_POLARIS10) { + min = 1000; + max = 2300; + } else { + min = 1100; + max = 2100; + } + + ro = efuse * (max - min) / 255 + min; + + /* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */ + for (i = 0; i < sclk_table->count; i++) { + smu_data->smc_state_table.Sclk_CKS_masterEn0_7 |= + sclk_table->entries[i].cks_enable << i; + if (hwmgr->chip_id == CHIP_POLARIS10) { + volt_without_cks = (uint32_t)((2753594000U + (sclk_table->entries[i].clk/100) * 136418 - (ro - 70) * 1000000) / \ + (2424180 - (sclk_table->entries[i].clk/100) * 1132925/1000)); + volt_with_cks = (uint32_t)((2797202000U + sclk_table->entries[i].clk/100 * 3232 - (ro - 65) * 1000000) / \ + (2522480 - sclk_table->entries[i].clk/100 * 115764/100)); + } else { + volt_without_cks = (uint32_t)((2416794800U + (sclk_table->entries[i].clk/100) * 1476925/10 - (ro - 50) * 1000000) / \ + (2625416 - (sclk_table->entries[i].clk/100) * (12586807/10000))); + volt_with_cks = (uint32_t)((2999656000U - sclk_table->entries[i].clk/100 * 392803 - (ro - 44) * 1000000) / \ + (3422454 - sclk_table->entries[i].clk/100 * (18886376/10000))); + } + + if (volt_without_cks >= volt_with_cks) + volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks + + sclk_table->entries[i].cks_voffset) * 100 + 624) / 625); + + smu_data->smc_state_table.Sclk_voltageOffset[i] = volt_offset; + } + + smu_data->smc_state_table.LdoRefSel = (table_info->cac_dtp_table->ucCKS_LDO_REFSEL != 0) ? table_info->cac_dtp_table->ucCKS_LDO_REFSEL : 6; + /* Populate CKS Lookup Table */ + if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5) + stretch_amount2 = 0; + else if (stretch_amount == 3 || stretch_amount == 4) + stretch_amount2 = 1; + else { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ClockStretcher); + PP_ASSERT_WITH_CODE(false, + "Stretch Amount in PPTable not supported\n", + return -EINVAL); + } + + value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL); + value &= 0xFFFFFFFE; + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL, value); + + return 0; +} + +static int polaris10_populate_vr_config(struct pp_hwmgr *hwmgr, + struct SMU74_Discrete_DpmTable *table) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + uint16_t config; + + config = VR_MERGED_WITH_VDDC; + table->VRConfig |= (config << VRCONF_VDDGFX_SHIFT); + + /* Set Vddc Voltage Controller */ + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) { + config = VR_SVI2_PLANE_1; + table->VRConfig |= config; + } else { + PP_ASSERT_WITH_CODE(false, + "VDDC should be on SVI2 control in merged mode!", + ); + } + /* Set Vddci Voltage Controller */ + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) { + config = VR_SVI2_PLANE_2; /* only in merged mode */ + table->VRConfig |= (config << VRCONF_VDDCI_SHIFT); + } else if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) { + config = VR_SMIO_PATTERN_1; + table->VRConfig |= (config << VRCONF_VDDCI_SHIFT); + } else { + config = VR_STATIC_VOLTAGE; + table->VRConfig |= (config << VRCONF_VDDCI_SHIFT); + } + /* Set Mvdd Voltage Controller */ + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->mvdd_control) { + config = VR_SVI2_PLANE_2; + table->VRConfig |= (config << VRCONF_MVDD_SHIFT); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, smu_data->smu7_data.soft_regs_start + + offsetof(SMU74_SoftRegisters, AllowMvddSwitch), 0x1); + } else { + config = VR_STATIC_VOLTAGE; + table->VRConfig |= (config << VRCONF_MVDD_SHIFT); + } + + return 0; +} + + +static int polaris10_populate_avfs_parameters(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + + SMU74_Discrete_DpmTable *table = &(smu_data->smc_state_table); + int result = 0; + struct pp_atom_ctrl__avfs_parameters avfs_params = {0}; + AVFS_meanNsigma_t AVFS_meanNsigma = { {0} }; + AVFS_Sclk_Offset_t AVFS_SclkOffset = { {0} }; + uint32_t tmp, i; + + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)hwmgr->pptable; + struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table = + table_info->vdd_dep_on_sclk; + + + if (((struct smu7_smumgr *)smu_data)->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED) + return result; + + result = atomctrl_get_avfs_information(hwmgr, &avfs_params); + + if (0 == result) { + table->BTCGB_VDROOP_TABLE[0].a0 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a0); + table->BTCGB_VDROOP_TABLE[0].a1 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a1); + table->BTCGB_VDROOP_TABLE[0].a2 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a2); + table->BTCGB_VDROOP_TABLE[1].a0 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0); + table->BTCGB_VDROOP_TABLE[1].a1 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1); + table->BTCGB_VDROOP_TABLE[1].a2 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2); + table->AVFSGB_VDROOP_TABLE[0].m1 = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_m1); + table->AVFSGB_VDROOP_TABLE[0].m2 = PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSON_m2); + table->AVFSGB_VDROOP_TABLE[0].b = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_b); + table->AVFSGB_VDROOP_TABLE[0].m1_shift = 24; + table->AVFSGB_VDROOP_TABLE[0].m2_shift = 12; + table->AVFSGB_VDROOP_TABLE[1].m1 = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1); + table->AVFSGB_VDROOP_TABLE[1].m2 = PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2); + table->AVFSGB_VDROOP_TABLE[1].b = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b); + table->AVFSGB_VDROOP_TABLE[1].m1_shift = 24; + table->AVFSGB_VDROOP_TABLE[1].m2_shift = 12; + table->MaxVoltage = PP_HOST_TO_SMC_US(avfs_params.usMaxVoltage_0_25mv); + AVFS_meanNsigma.Aconstant[0] = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant0); + AVFS_meanNsigma.Aconstant[1] = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant1); + AVFS_meanNsigma.Aconstant[2] = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant2); + AVFS_meanNsigma.DC_tol_sigma = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_DC_tol_sigma); + AVFS_meanNsigma.Platform_mean = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_mean); + AVFS_meanNsigma.PSM_Age_CompFactor = PP_HOST_TO_SMC_US(avfs_params.usPSM_Age_ComFactor); + AVFS_meanNsigma.Platform_sigma = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_sigma); + + for (i = 0; i < NUM_VFT_COLUMNS; i++) { + AVFS_meanNsigma.Static_Voltage_Offset[i] = (uint8_t)(sclk_table->entries[i].cks_voffset * 100 / 625); + AVFS_SclkOffset.Sclk_Offset[i] = PP_HOST_TO_SMC_US((uint16_t)(sclk_table->entries[i].sclk_offset) / 100); + } + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU74_Firmware_Header, AvfsMeanNSigma), + &tmp, SMC_RAM_END); + + smu7_copy_bytes_to_smc(hwmgr, + tmp, + (uint8_t *)&AVFS_meanNsigma, + sizeof(AVFS_meanNsigma_t), + SMC_RAM_END); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU74_Firmware_Header, AvfsSclkOffsetTable), + &tmp, SMC_RAM_END); + smu7_copy_bytes_to_smc(hwmgr, + tmp, + (uint8_t *)&AVFS_SclkOffset, + sizeof(AVFS_Sclk_Offset_t), + SMC_RAM_END); + + data->avfs_vdroop_override_setting = (avfs_params.ucEnableGB_VDROOP_TABLE_CKSON << BTCGB0_Vdroop_Enable_SHIFT) | + (avfs_params.ucEnableGB_VDROOP_TABLE_CKSOFF << BTCGB1_Vdroop_Enable_SHIFT) | + (avfs_params.ucEnableGB_FUSE_TABLE_CKSON << AVFSGB0_Vdroop_Enable_SHIFT) | + (avfs_params.ucEnableGB_FUSE_TABLE_CKSOFF << AVFSGB1_Vdroop_Enable_SHIFT); + data->apply_avfs_cks_off_voltage = (avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage == 1) ? true : false; + } + return result; +} + +static int polaris10_init_arb_table_index(struct pp_hwmgr *hwmgr) +{ + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + uint32_t tmp; + int result; + + /* This is a read-modify-write on the first byte of the ARB table. + * The first byte in the SMU73_Discrete_MCArbDramTimingTable structure + * is the field 'current'. + * This solution is ugly, but we never write the whole table only + * individual fields in it. + * In reality this field should not be in that structure + * but in a soft register. + */ + result = smu7_read_smc_sram_dword(hwmgr, + smu_data->smu7_data.arb_table_start, &tmp, SMC_RAM_END); + + if (result) + return result; + + tmp &= 0x00FFFFFF; + tmp |= ((uint32_t)MC_CG_ARB_FREQ_F1) << 24; + + return smu7_write_smc_sram_dword(hwmgr, + smu_data->smu7_data.arb_table_start, tmp, SMC_RAM_END); +} + +static void polaris10_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr) +{ + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + if (table_info && + table_info->cac_dtp_table->usPowerTuneDataSetID <= POWERTUNE_DEFAULT_SET_MAX && + table_info->cac_dtp_table->usPowerTuneDataSetID) + smu_data->power_tune_defaults = + &polaris10_power_tune_data_set_array + [table_info->cac_dtp_table->usPowerTuneDataSetID - 1]; + else + smu_data->power_tune_defaults = &polaris10_power_tune_data_set_array[0]; + +} + +static void polaris10_save_default_power_profile(struct pp_hwmgr *hwmgr) +{ + struct polaris10_smumgr *data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + struct SMU74_Discrete_GraphicsLevel *levels = + data->smc_state_table.GraphicsLevel; + unsigned min_level = 1; + + hwmgr->default_gfx_power_profile.activity_threshold = + be16_to_cpu(levels[0].ActivityLevel); + hwmgr->default_gfx_power_profile.up_hyst = levels[0].UpHyst; + hwmgr->default_gfx_power_profile.down_hyst = levels[0].DownHyst; + hwmgr->default_gfx_power_profile.type = AMD_PP_GFX_PROFILE; + + hwmgr->default_compute_power_profile = hwmgr->default_gfx_power_profile; + hwmgr->default_compute_power_profile.type = AMD_PP_COMPUTE_PROFILE; + + /* Workaround compute SDMA instability: disable lowest SCLK + * DPM level. Optimize compute power profile: Use only highest + * 2 power levels (if more than 2 are available), Hysteresis: + * 0ms up, 5ms down + */ + if (data->smc_state_table.GraphicsDpmLevelCount > 2) + min_level = data->smc_state_table.GraphicsDpmLevelCount - 2; + else if (data->smc_state_table.GraphicsDpmLevelCount == 2) + min_level = 1; + else + min_level = 0; + hwmgr->default_compute_power_profile.min_sclk = + be32_to_cpu(levels[min_level].SclkSetting.SclkFrequency); + hwmgr->default_compute_power_profile.up_hyst = 0; + hwmgr->default_compute_power_profile.down_hyst = 5; + + hwmgr->gfx_power_profile = hwmgr->default_gfx_power_profile; + hwmgr->compute_power_profile = hwmgr->default_compute_power_profile; +} + +static int polaris10_init_smc_table(struct pp_hwmgr *hwmgr) +{ + int result; + struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend); + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct SMU74_Discrete_DpmTable *table = &(smu_data->smc_state_table); + uint8_t i; + struct pp_atomctrl_gpio_pin_assignment gpio_pin; + pp_atomctrl_clock_dividers_vi dividers; + + polaris10_initialize_power_tune_defaults(hwmgr); + + if (SMU7_VOLTAGE_CONTROL_NONE != hw_data->voltage_control) + polaris10_populate_smc_voltage_tables(hwmgr, table); + + table->SystemFlags = 0; + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition)) + table->SystemFlags |= PPSMC_SYSTEMFLAG_GPIO_DC; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StepVddc)) + table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC; + + if (hw_data->is_memory_gddr5) + table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5; + + if (hw_data->ulv_supported && table_info->us_ulv_voltage_offset) { + result = polaris10_populate_ulv_state(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize ULV state!", return result); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixCG_ULV_PARAMETER, SMU7_CGULVPARAMETER_DFLT); + } + + result = polaris10_populate_smc_link_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize Link Level!", return result); + + result = polaris10_populate_all_graphic_levels(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize Graphics Level!", return result); + + result = polaris10_populate_all_memory_levels(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize Memory Level!", return result); + + result = polaris10_populate_smc_acpi_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize ACPI Level!", return result); + + result = polaris10_populate_smc_vce_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize VCE Level!", return result); + + result = polaris10_populate_smc_samu_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize SAMU Level!", return result); + + /* Since only the initial state is completely set up at this point + * (the other states are just copies of the boot state) we only + * need to populate the ARB settings for the initial state. + */ + result = polaris10_program_memory_timing_parameters(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to Write ARB settings for the initial state.", return result); + + result = polaris10_populate_smc_uvd_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize UVD Level!", return result); + + result = polaris10_populate_smc_boot_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize Boot Level!", return result); + + result = polaris10_populate_smc_initailial_state(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize Boot State!", return result); + + result = polaris10_populate_bapm_parameters_in_dpm_table(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to populate BAPM Parameters!", return result); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ClockStretcher)) { + result = polaris10_populate_clock_stretcher_data_table(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to populate Clock Stretcher Data Table!", + return result); + } + + result = polaris10_populate_avfs_parameters(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, "Failed to populate AVFS Parameters!", return result;); + + table->CurrSclkPllRange = 0xff; + table->GraphicsVoltageChangeEnable = 1; + table->GraphicsThermThrottleEnable = 1; + table->GraphicsInterval = 1; + table->VoltageInterval = 1; + table->ThermalInterval = 1; + table->TemperatureLimitHigh = + table_info->cac_dtp_table->usTargetOperatingTemp * + SMU7_Q88_FORMAT_CONVERSION_UNIT; + table->TemperatureLimitLow = + (table_info->cac_dtp_table->usTargetOperatingTemp - 1) * + SMU7_Q88_FORMAT_CONVERSION_UNIT; + table->MemoryVoltageChangeEnable = 1; + table->MemoryInterval = 1; + table->VoltageResponseTime = 0; + table->PhaseResponseTime = 0; + table->MemoryThermThrottleEnable = 1; + table->PCIeBootLinkLevel = 0; + table->PCIeGenInterval = 1; + table->VRConfig = 0; + + result = polaris10_populate_vr_config(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to populate VRConfig setting!", return result); + + table->ThermGpio = 17; + table->SclkStepSize = 0x4000; + + if (atomctrl_get_pp_assign_pin(hwmgr, VDDC_VRHOT_GPIO_PINID, &gpio_pin)) { + table->VRHotGpio = gpio_pin.uc_gpio_pin_bit_shift; + } else { + table->VRHotGpio = SMU7_UNUSED_GPIO_PIN; + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_RegulatorHot); + } + + if (atomctrl_get_pp_assign_pin(hwmgr, PP_AC_DC_SWITCH_GPIO_PINID, + &gpio_pin)) { + table->AcDcGpio = gpio_pin.uc_gpio_pin_bit_shift; + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition); + } else { + table->AcDcGpio = SMU7_UNUSED_GPIO_PIN; + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition); + } + + /* Thermal Output GPIO */ + if (atomctrl_get_pp_assign_pin(hwmgr, THERMAL_INT_OUTPUT_GPIO_PINID, + &gpio_pin)) { + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ThermalOutGPIO); + + table->ThermOutGpio = gpio_pin.uc_gpio_pin_bit_shift; + + /* For porlarity read GPIOPAD_A with assigned Gpio pin + * since VBIOS will program this register to set 'inactive state', + * driver can then determine 'active state' from this and + * program SMU with correct polarity + */ + table->ThermOutPolarity = (0 == (cgs_read_register(hwmgr->device, mmGPIOPAD_A) + & (1 << gpio_pin.uc_gpio_pin_bit_shift))) ? 1:0; + table->ThermOutMode = SMU7_THERM_OUT_MODE_THERM_ONLY; + + /* if required, combine VRHot/PCC with thermal out GPIO */ + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_RegulatorHot) + && phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_CombinePCCWithThermalSignal)) + table->ThermOutMode = SMU7_THERM_OUT_MODE_THERM_VRHOT; + } else { + table->ThermOutGpio = 17; + table->ThermOutPolarity = 1; + table->ThermOutMode = SMU7_THERM_OUT_MODE_DISABLE; + } + + /* Populate BIF_SCLK levels into SMC DPM table */ + for (i = 0; i <= hw_data->dpm_table.pcie_speed_table.count; i++) { + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, smu_data->bif_sclk_table[i], ÷rs); + PP_ASSERT_WITH_CODE((result == 0), "Can not find DFS divide id for Sclk", return result); + + if (i == 0) + table->Ulv.BifSclkDfs = PP_HOST_TO_SMC_US((USHORT)(dividers.pll_post_divider)); + else + table->LinkLevel[i-1].BifSclkDfs = PP_HOST_TO_SMC_US((USHORT)(dividers.pll_post_divider)); + } + + for (i = 0; i < SMU74_MAX_ENTRIES_SMIO; i++) + table->Smio[i] = PP_HOST_TO_SMC_UL(table->Smio[i]); + + CONVERT_FROM_HOST_TO_SMC_UL(table->SystemFlags); + CONVERT_FROM_HOST_TO_SMC_UL(table->VRConfig); + CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask1); + CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask2); + CONVERT_FROM_HOST_TO_SMC_UL(table->SclkStepSize); + CONVERT_FROM_HOST_TO_SMC_UL(table->CurrSclkPllRange); + CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitHigh); + CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitLow); + CONVERT_FROM_HOST_TO_SMC_US(table->VoltageResponseTime); + CONVERT_FROM_HOST_TO_SMC_US(table->PhaseResponseTime); + + /* Upload all dpm data to SMC memory.(dpm level, dpm level count etc) */ + result = smu7_copy_bytes_to_smc(hwmgr, + smu_data->smu7_data.dpm_table_start + + offsetof(SMU74_Discrete_DpmTable, SystemFlags), + (uint8_t *)&(table->SystemFlags), + sizeof(SMU74_Discrete_DpmTable) - 3 * sizeof(SMU74_PIDController), + SMC_RAM_END); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to upload dpm data to SMC memory!", return result); + + result = polaris10_init_arb_table_index(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to upload arb data to SMC memory!", return result); + + result = polaris10_populate_pm_fuses(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to populate PM fuses to SMC memory!", return result); + + polaris10_save_default_power_profile(hwmgr); + + return 0; +} + +static int polaris10_program_mem_timing_parameters(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + if (data->need_update_smu7_dpm_table & + (DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_OD_UPDATE_MCLK)) + return polaris10_program_memory_timing_parameters(hwmgr); + + return 0; +} + +int polaris10_thermal_avfs_enable(struct pp_hwmgr *hwmgr) +{ + int ret; + struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(hwmgr->smu_backend); + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED) + return 0; + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetGBDroopSettings, data->avfs_vdroop_override_setting); + + ret = (smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableAvfs) == 0) ? + 0 : -1; + + if (!ret) + /* If this param is not changed, this function could fire unnecessarily */ + smu_data->avfs.avfs_btc_status = AVFS_BTC_COMPLETED_PREVIOUSLY; + + return ret; +} + +static int polaris10_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) +{ + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + SMU74_Discrete_FanTable fan_table = { FDO_MODE_HARDWARE }; + uint32_t duty100; + uint32_t t_diff1, t_diff2, pwm_diff1, pwm_diff2; + uint16_t fdo_min, slope1, slope2; + uint32_t reference_clock; + int res; + uint64_t tmp64; + + if (hwmgr->thermal_controller.fanInfo.bNoFan) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + + if (smu_data->smu7_data.fan_table_start == 0) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + + duty100 = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + CG_FDO_CTRL1, FMAX_DUTY100); + + if (duty100 == 0) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + + tmp64 = hwmgr->thermal_controller.advanceFanControlParameters. + usPWMMin * duty100; + do_div(tmp64, 10000); + fdo_min = (uint16_t)tmp64; + + t_diff1 = hwmgr->thermal_controller.advanceFanControlParameters.usTMed - + hwmgr->thermal_controller.advanceFanControlParameters.usTMin; + t_diff2 = hwmgr->thermal_controller.advanceFanControlParameters.usTHigh - + hwmgr->thermal_controller.advanceFanControlParameters.usTMed; + + pwm_diff1 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMMed - + hwmgr->thermal_controller.advanceFanControlParameters.usPWMMin; + pwm_diff2 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMHigh - + hwmgr->thermal_controller.advanceFanControlParameters.usPWMMed; + + slope1 = (uint16_t)((50 + ((16 * duty100 * pwm_diff1) / t_diff1)) / 100); + slope2 = (uint16_t)((50 + ((16 * duty100 * pwm_diff2) / t_diff2)) / 100); + + fan_table.TempMin = cpu_to_be16((50 + hwmgr-> + thermal_controller.advanceFanControlParameters.usTMin) / 100); + fan_table.TempMed = cpu_to_be16((50 + hwmgr-> + thermal_controller.advanceFanControlParameters.usTMed) / 100); + fan_table.TempMax = cpu_to_be16((50 + hwmgr-> + thermal_controller.advanceFanControlParameters.usTMax) / 100); + + fan_table.Slope1 = cpu_to_be16(slope1); + fan_table.Slope2 = cpu_to_be16(slope2); + + fan_table.FdoMin = cpu_to_be16(fdo_min); + + fan_table.HystDown = cpu_to_be16(hwmgr-> + thermal_controller.advanceFanControlParameters.ucTHyst); + + fan_table.HystUp = cpu_to_be16(1); + + fan_table.HystSlope = cpu_to_be16(1); + + fan_table.TempRespLim = cpu_to_be16(5); + + reference_clock = smu7_get_xclk(hwmgr); + + fan_table.RefreshPeriod = cpu_to_be32((hwmgr-> + thermal_controller.advanceFanControlParameters.ulCycleDelay * + reference_clock) / 1600); + + fan_table.FdoMax = cpu_to_be16((uint16_t)duty100); + + fan_table.TempSrc = (uint8_t)PHM_READ_VFPF_INDIRECT_FIELD( + hwmgr->device, CGS_IND_REG__SMC, + CG_MULT_THERMAL_CTRL, TEMP_SEL); + + res = smu7_copy_bytes_to_smc(hwmgr, smu_data->smu7_data.fan_table_start, + (uint8_t *)&fan_table, (uint32_t)sizeof(fan_table), + SMC_RAM_END); + + if (!res && hwmgr->thermal_controller. + advanceFanControlParameters.ucMinimumPWMLimit) + res = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetFanMinPwm, + hwmgr->thermal_controller. + advanceFanControlParameters.ucMinimumPWMLimit); + + if (!res && hwmgr->thermal_controller. + advanceFanControlParameters.ulMinFanSCLKAcousticLimit) + res = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetFanSclkTarget, + hwmgr->thermal_controller. + advanceFanControlParameters.ulMinFanSCLKAcousticLimit); + + if (res) + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + + return 0; +} + +static int polaris10_update_uvd_smc_table(struct pp_hwmgr *hwmgr) +{ + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + uint32_t mm_boot_level_offset, mm_boot_level_value; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + smu_data->smc_state_table.UvdBootLevel = 0; + if (table_info->mm_dep_table->count > 0) + smu_data->smc_state_table.UvdBootLevel = + (uint8_t) (table_info->mm_dep_table->count - 1); + mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + offsetof(SMU74_Discrete_DpmTable, + UvdBootLevel); + mm_boot_level_offset /= 4; + mm_boot_level_offset *= 4; + mm_boot_level_value = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset); + mm_boot_level_value &= 0x00FFFFFF; + mm_boot_level_value |= smu_data->smc_state_table.UvdBootLevel << 24; + cgs_write_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); + + if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_UVDDPM) || + phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StablePState)) + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_UVDDPM_SetEnabledMask, + (uint32_t)(1 << smu_data->smc_state_table.UvdBootLevel)); + return 0; +} + +static int polaris10_update_vce_smc_table(struct pp_hwmgr *hwmgr) +{ + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + uint32_t mm_boot_level_offset, mm_boot_level_value; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StablePState)) + smu_data->smc_state_table.VceBootLevel = + (uint8_t) (table_info->mm_dep_table->count - 1); + else + smu_data->smc_state_table.VceBootLevel = 0; + + mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + + offsetof(SMU74_Discrete_DpmTable, VceBootLevel); + mm_boot_level_offset /= 4; + mm_boot_level_offset *= 4; + mm_boot_level_value = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset); + mm_boot_level_value &= 0xFF00FFFF; + mm_boot_level_value |= smu_data->smc_state_table.VceBootLevel << 16; + cgs_write_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState)) + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_VCEDPM_SetEnabledMask, + (uint32_t)1 << smu_data->smc_state_table.VceBootLevel); + return 0; +} + +static int polaris10_update_samu_smc_table(struct pp_hwmgr *hwmgr) +{ + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + uint32_t mm_boot_level_offset, mm_boot_level_value; + + + smu_data->smc_state_table.SamuBootLevel = 0; + mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + + offsetof(SMU74_Discrete_DpmTable, SamuBootLevel); + + mm_boot_level_offset /= 4; + mm_boot_level_offset *= 4; + mm_boot_level_value = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset); + mm_boot_level_value &= 0xFFFFFF00; + mm_boot_level_value |= smu_data->smc_state_table.SamuBootLevel << 0; + cgs_write_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StablePState)) + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SAMUDPM_SetEnabledMask, + (uint32_t)(1 << smu_data->smc_state_table.SamuBootLevel)); + return 0; +} + + +static int polaris10_update_bif_smc_table(struct pp_hwmgr *hwmgr) +{ + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_pcie_table *pcie_table = table_info->pcie_table; + int max_entry, i; + + max_entry = (SMU74_MAX_LEVELS_LINK < pcie_table->count) ? + SMU74_MAX_LEVELS_LINK : + pcie_table->count; + /* Setup BIF_SCLK levels */ + for (i = 0; i < max_entry; i++) + smu_data->bif_sclk_table[i] = pcie_table->entries[i].pcie_sclk; + return 0; +} + +static int polaris10_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type) +{ + switch (type) { + case SMU_UVD_TABLE: + polaris10_update_uvd_smc_table(hwmgr); + break; + case SMU_VCE_TABLE: + polaris10_update_vce_smc_table(hwmgr); + break; + case SMU_SAMU_TABLE: + polaris10_update_samu_smc_table(hwmgr); + break; + case SMU_BIF_TABLE: + polaris10_update_bif_smc_table(hwmgr); + default: + break; + } + return 0; +} + +static int polaris10_update_sclk_threshold(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + + int result = 0; + uint32_t low_sclk_interrupt_threshold = 0; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SclkThrottleLowNotification) + && (hwmgr->gfx_arbiter.sclk_threshold != + data->low_sclk_interrupt_threshold)) { + data->low_sclk_interrupt_threshold = + hwmgr->gfx_arbiter.sclk_threshold; + low_sclk_interrupt_threshold = + data->low_sclk_interrupt_threshold; + + CONVERT_FROM_HOST_TO_SMC_UL(low_sclk_interrupt_threshold); + + result = smu7_copy_bytes_to_smc( + hwmgr, + smu_data->smu7_data.dpm_table_start + + offsetof(SMU74_Discrete_DpmTable, + LowSclkInterruptThreshold), + (uint8_t *)&low_sclk_interrupt_threshold, + sizeof(uint32_t), + SMC_RAM_END); + } + PP_ASSERT_WITH_CODE((result == 0), + "Failed to update SCLK threshold!", return result); + + result = polaris10_program_mem_timing_parameters(hwmgr); + PP_ASSERT_WITH_CODE((result == 0), + "Failed to program memory timing parameters!", + ); + + return result; +} + +static uint32_t polaris10_get_offsetof(uint32_t type, uint32_t member) +{ + switch (type) { + case SMU_SoftRegisters: + switch (member) { + case HandshakeDisables: + return offsetof(SMU74_SoftRegisters, HandshakeDisables); + case VoltageChangeTimeout: + return offsetof(SMU74_SoftRegisters, VoltageChangeTimeout); + case AverageGraphicsActivity: + return offsetof(SMU74_SoftRegisters, AverageGraphicsActivity); + case PreVBlankGap: + return offsetof(SMU74_SoftRegisters, PreVBlankGap); + case VBlankTimeout: + return offsetof(SMU74_SoftRegisters, VBlankTimeout); + case UcodeLoadStatus: + return offsetof(SMU74_SoftRegisters, UcodeLoadStatus); + case DRAM_LOG_ADDR_H: + return offsetof(SMU74_SoftRegisters, DRAM_LOG_ADDR_H); + case DRAM_LOG_ADDR_L: + return offsetof(SMU74_SoftRegisters, DRAM_LOG_ADDR_L); + case DRAM_LOG_PHY_ADDR_H: + return offsetof(SMU74_SoftRegisters, DRAM_LOG_PHY_ADDR_H); + case DRAM_LOG_PHY_ADDR_L: + return offsetof(SMU74_SoftRegisters, DRAM_LOG_PHY_ADDR_L); + case DRAM_LOG_BUFF_SIZE: + return offsetof(SMU74_SoftRegisters, DRAM_LOG_BUFF_SIZE); + } + case SMU_Discrete_DpmTable: + switch (member) { + case UvdBootLevel: + return offsetof(SMU74_Discrete_DpmTable, UvdBootLevel); + case VceBootLevel: + return offsetof(SMU74_Discrete_DpmTable, VceBootLevel); + case SamuBootLevel: + return offsetof(SMU74_Discrete_DpmTable, SamuBootLevel); + case LowSclkInterruptThreshold: + return offsetof(SMU74_Discrete_DpmTable, LowSclkInterruptThreshold); + } + } + pr_warn("can't get the offset of type %x member %x\n", type, member); + return 0; +} + +static uint32_t polaris10_get_mac_definition(uint32_t value) +{ + switch (value) { + case SMU_MAX_LEVELS_GRAPHICS: + return SMU74_MAX_LEVELS_GRAPHICS; + case SMU_MAX_LEVELS_MEMORY: + return SMU74_MAX_LEVELS_MEMORY; + case SMU_MAX_LEVELS_LINK: + return SMU74_MAX_LEVELS_LINK; + case SMU_MAX_ENTRIES_SMIO: + return SMU74_MAX_ENTRIES_SMIO; + case SMU_MAX_LEVELS_VDDC: + return SMU74_MAX_LEVELS_VDDC; + case SMU_MAX_LEVELS_VDDGFX: + return SMU74_MAX_LEVELS_VDDGFX; + case SMU_MAX_LEVELS_VDDCI: + return SMU74_MAX_LEVELS_VDDCI; + case SMU_MAX_LEVELS_MVDD: + return SMU74_MAX_LEVELS_MVDD; + case SMU_UVD_MCLK_HANDSHAKE_DISABLE: + return SMU7_UVD_MCLK_HANDSHAKE_DISABLE; + } + + pr_warn("can't get the mac of %x\n", value); + return 0; +} + +static int polaris10_process_firmware_header(struct pp_hwmgr *hwmgr) +{ + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t tmp; + int result; + bool error = false; + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU74_Firmware_Header, DpmTable), + &tmp, SMC_RAM_END); + + if (0 == result) + smu_data->smu7_data.dpm_table_start = tmp; + + error |= (0 != result); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU74_Firmware_Header, SoftRegisters), + &tmp, SMC_RAM_END); + + if (!result) { + data->soft_regs_start = tmp; + smu_data->smu7_data.soft_regs_start = tmp; + } + + error |= (0 != result); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU74_Firmware_Header, mcRegisterTable), + &tmp, SMC_RAM_END); + + if (!result) + smu_data->smu7_data.mc_reg_table_start = tmp; + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU74_Firmware_Header, FanTable), + &tmp, SMC_RAM_END); + + if (!result) + smu_data->smu7_data.fan_table_start = tmp; + + error |= (0 != result); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU74_Firmware_Header, mcArbDramTimingTable), + &tmp, SMC_RAM_END); + + if (!result) + smu_data->smu7_data.arb_table_start = tmp; + + error |= (0 != result); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU74_Firmware_Header, Version), + &tmp, SMC_RAM_END); + + if (!result) + hwmgr->microcode_version_info.SMC = tmp; + + error |= (0 != result); + + return error ? -1 : 0; +} + +static bool polaris10_is_dpm_running(struct pp_hwmgr *hwmgr) +{ + return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) + ? true : false; +} + +static int polaris10_populate_requested_graphic_levels(struct pp_hwmgr *hwmgr, + struct amd_pp_profile *request) +{ + struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *) + (hwmgr->smu_backend); + struct SMU74_Discrete_GraphicsLevel *levels = + smu_data->smc_state_table.GraphicsLevel; + uint32_t array = smu_data->smu7_data.dpm_table_start + + offsetof(SMU74_Discrete_DpmTable, GraphicsLevel); + uint32_t array_size = sizeof(struct SMU74_Discrete_GraphicsLevel) * + SMU74_MAX_LEVELS_GRAPHICS; + uint32_t i; + + for (i = 0; i < smu_data->smc_state_table.GraphicsDpmLevelCount; i++) { + levels[i].ActivityLevel = + cpu_to_be16(request->activity_threshold); + levels[i].EnabledForActivity = 1; + levels[i].UpHyst = request->up_hyst; + levels[i].DownHyst = request->down_hyst; + } + + return smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels, + array_size, SMC_RAM_END); +} + const struct pp_smumgr_func polaris10_smu_funcs = { .smu_init = polaris10_smu_init, .smu_fini = smu7_smu_fini, diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c index c997117f2461..7f5359a97ef2 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c @@ -25,12 +25,13 @@ #include "pp_debug.h" #include "smumgr.h" #include "smu_ucode_xfer_vi.h" -#include "smu/smu_7_1_3_d.h" -#include "smu/smu_7_1_3_sh_mask.h" #include "ppatomctrl.h" #include "cgs_common.h" #include "smu7_ppsmc.h" #include "smu7_smumgr.h" +#include "smu7_common.h" + +#include "polaris10_pwrvirus.h" #define SMU7_SMC_SIZE 0x20000 @@ -540,6 +541,47 @@ int smu7_upload_smu_firmware_image(struct pp_hwmgr *hwmgr) return result; } +static void execute_pwr_table(struct pp_hwmgr *hwmgr, const PWR_Command_Table *pvirus, int size) +{ + int i; + uint32_t reg, data; + + for (i = 0; i < size; i++) { + reg = pvirus->reg; + data = pvirus->data; + if (reg != 0xffffffff) + cgs_write_register(hwmgr->device, reg, data); + else + break; + pvirus++; + } +} + +static void execute_pwr_dfy_table(struct pp_hwmgr *hwmgr, const PWR_DFY_Section *section) +{ + int i; + + cgs_write_register(hwmgr->device, mmCP_DFY_CNTL, section->dfy_cntl); + cgs_write_register(hwmgr->device, mmCP_DFY_ADDR_HI, section->dfy_addr_hi); + cgs_write_register(hwmgr->device, mmCP_DFY_ADDR_LO, section->dfy_addr_lo); + for (i = 0; i < section->dfy_size; i++) + cgs_write_register(hwmgr->device, mmCP_DFY_DATA_0, section->dfy_data[i]); +} + +int smu7_setup_pwr_virus(struct pp_hwmgr *hwmgr) +{ + execute_pwr_table(hwmgr, pwr_virus_table_pre, ARRAY_SIZE(pwr_virus_table_pre)); + execute_pwr_dfy_table(hwmgr, &pwr_virus_section1); + execute_pwr_dfy_table(hwmgr, &pwr_virus_section2); + execute_pwr_dfy_table(hwmgr, &pwr_virus_section3); + execute_pwr_dfy_table(hwmgr, &pwr_virus_section4); + execute_pwr_dfy_table(hwmgr, &pwr_virus_section5); + execute_pwr_dfy_table(hwmgr, &pwr_virus_section6); + execute_pwr_table(hwmgr, pwr_virus_table_post, ARRAY_SIZE(pwr_virus_table_post)); + + return 0; +} + int smu7_init(struct pp_hwmgr *hwmgr) { struct smu7_smumgr *smu_data; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h index 0b63c5c1043c..c87263bc0caa 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h @@ -88,4 +88,6 @@ int smu7_upload_smu_firmware_image(struct pp_hwmgr *hwmgr); int smu7_init(struct pp_hwmgr *hwmgr); int smu7_smu_fini(struct pp_hwmgr *hwmgr); -#endif
\ No newline at end of file +int smu7_setup_pwr_virus(struct pp_hwmgr *hwmgr); + +#endif diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smc.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smc.c deleted file mode 100644 index 1f720ccdaf99..000000000000 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smc.c +++ /dev/null @@ -1,3261 +0,0 @@ -/* - * Copyright 2015 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * - */ - -#include "pp_debug.h" -#include "tonga_smc.h" -#include "smu7_dyn_defaults.h" - -#include "smu7_hwmgr.h" -#include "hardwaremanager.h" -#include "ppatomctrl.h" -#include "cgs_common.h" -#include "atombios.h" -#include "tonga_smumgr.h" -#include "pppcielanes.h" -#include "pp_endian.h" -#include "smu7_ppsmc.h" - -#include "smu72_discrete.h" - -#include "smu/smu_7_1_2_d.h" -#include "smu/smu_7_1_2_sh_mask.h" - -#include "gmc/gmc_8_1_d.h" -#include "gmc/gmc_8_1_sh_mask.h" - -#include "bif/bif_5_0_d.h" -#include "bif/bif_5_0_sh_mask.h" - -#include "dce/dce_10_0_d.h" -#include "dce/dce_10_0_sh_mask.h" - - -#define VOLTAGE_SCALE 4 -#define POWERTUNE_DEFAULT_SET_MAX 1 -#define VOLTAGE_VID_OFFSET_SCALE1 625 -#define VOLTAGE_VID_OFFSET_SCALE2 100 -#define MC_CG_ARB_FREQ_F1 0x0b -#define VDDC_VDDCI_DELTA 200 - - -static const struct tonga_pt_defaults tonga_power_tune_data_set_array[POWERTUNE_DEFAULT_SET_MAX] = { -/* sviLoadLIneEn, SviLoadLineVddC, TDC_VDDC_ThrottleReleaseLimitPerc, TDC_MAWt, - * TdcWaterfallCtl, DTEAmbientTempBase, DisplayCac, BAPM_TEMP_GRADIENT - */ - {1, 0xF, 0xFD, 0x19, - 5, 45, 0, 0xB0000, - {0x79, 0x253, 0x25D, 0xAE, 0x72, 0x80, 0x83, 0x86, 0x6F, 0xC8, - 0xC9, 0xC9, 0x2F, 0x4D, 0x61}, - {0x17C, 0x172, 0x180, 0x1BC, 0x1B3, 0x1BD, 0x206, 0x200, 0x203, - 0x25D, 0x25A, 0x255, 0x2C3, 0x2C5, 0x2B4} - }, -}; - -/* [Fmin, Fmax, LDO_REFSEL, USE_FOR_LOW_FREQ] */ -static const uint16_t tonga_clock_stretcher_lookup_table[2][4] = { - {600, 1050, 3, 0}, - {600, 1050, 6, 1} -}; - -/* [FF, SS] type, [] 4 voltage ranges, - * and [Floor Freq, Boundary Freq, VID min , VID max] - */ -static const uint32_t tonga_clock_stretcher_ddt_table[2][4][4] = { - { {265, 529, 120, 128}, {325, 650, 96, 119}, {430, 860, 32, 95}, {0, 0, 0, 31} }, - { {275, 550, 104, 112}, {319, 638, 96, 103}, {360, 720, 64, 95}, {384, 768, 32, 63} } -}; - -/* [Use_For_Low_freq] value, [0%, 5%, 10%, 7.14%, 14.28%, 20%] */ -static const uint8_t tonga_clock_stretch_amount_conversion[2][6] = { - {0, 1, 3, 2, 4, 5}, - {0, 2, 4, 5, 6, 5} -}; - -/* PPGen has the gain setting generated in x * 100 unit - * This function is to convert the unit to x * 4096(0x1000) unit. - * This is the unit expected by SMC firmware - */ - - -static int tonga_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr, - phm_ppt_v1_clock_voltage_dependency_table *allowed_clock_voltage_table, - uint32_t clock, SMU_VoltageLevel *voltage, uint32_t *mvdd) -{ - uint32_t i = 0; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *pptable_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - - /* clock - voltage dependency table is empty table */ - if (allowed_clock_voltage_table->count == 0) - return -EINVAL; - - for (i = 0; i < allowed_clock_voltage_table->count; i++) { - /* find first sclk bigger than request */ - if (allowed_clock_voltage_table->entries[i].clk >= clock) { - voltage->VddGfx = phm_get_voltage_index( - pptable_info->vddgfx_lookup_table, - allowed_clock_voltage_table->entries[i].vddgfx); - voltage->Vddc = phm_get_voltage_index( - pptable_info->vddc_lookup_table, - allowed_clock_voltage_table->entries[i].vddc); - - if (allowed_clock_voltage_table->entries[i].vddci) - voltage->Vddci = - phm_get_voltage_id(&data->vddci_voltage_table, allowed_clock_voltage_table->entries[i].vddci); - else - voltage->Vddci = - phm_get_voltage_id(&data->vddci_voltage_table, - allowed_clock_voltage_table->entries[i].vddc - VDDC_VDDCI_DELTA); - - - if (allowed_clock_voltage_table->entries[i].mvdd) - *mvdd = (uint32_t) allowed_clock_voltage_table->entries[i].mvdd; - - voltage->Phases = 1; - return 0; - } - } - - /* sclk is bigger than max sclk in the dependence table */ - voltage->VddGfx = phm_get_voltage_index(pptable_info->vddgfx_lookup_table, - allowed_clock_voltage_table->entries[i-1].vddgfx); - voltage->Vddc = phm_get_voltage_index(pptable_info->vddc_lookup_table, - allowed_clock_voltage_table->entries[i-1].vddc); - - if (allowed_clock_voltage_table->entries[i-1].vddci) - voltage->Vddci = phm_get_voltage_id(&data->vddci_voltage_table, - allowed_clock_voltage_table->entries[i-1].vddci); - - if (allowed_clock_voltage_table->entries[i-1].mvdd) - *mvdd = (uint32_t) allowed_clock_voltage_table->entries[i-1].mvdd; - - return 0; -} - - -/** - * Vddc table preparation for SMC. - * - * @param hwmgr the address of the hardware manager - * @param table the SMC DPM table structure to be populated - * @return always 0 - */ -static int tonga_populate_smc_vddc_table(struct pp_hwmgr *hwmgr, - SMU72_Discrete_DpmTable *table) -{ - unsigned int count; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) { - table->VddcLevelCount = data->vddc_voltage_table.count; - for (count = 0; count < table->VddcLevelCount; count++) { - table->VddcTable[count] = - PP_HOST_TO_SMC_US(data->vddc_voltage_table.entries[count].value * VOLTAGE_SCALE); - } - CONVERT_FROM_HOST_TO_SMC_UL(table->VddcLevelCount); - } - return 0; -} - -/** - * VddGfx table preparation for SMC. - * - * @param hwmgr the address of the hardware manager - * @param table the SMC DPM table structure to be populated - * @return always 0 - */ -static int tonga_populate_smc_vdd_gfx_table(struct pp_hwmgr *hwmgr, - SMU72_Discrete_DpmTable *table) -{ - unsigned int count; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vdd_gfx_control) { - table->VddGfxLevelCount = data->vddgfx_voltage_table.count; - for (count = 0; count < data->vddgfx_voltage_table.count; count++) { - table->VddGfxTable[count] = - PP_HOST_TO_SMC_US(data->vddgfx_voltage_table.entries[count].value * VOLTAGE_SCALE); - } - CONVERT_FROM_HOST_TO_SMC_UL(table->VddGfxLevelCount); - } - return 0; -} - -/** - * Vddci table preparation for SMC. - * - * @param *hwmgr The address of the hardware manager. - * @param *table The SMC DPM table structure to be populated. - * @return 0 - */ -static int tonga_populate_smc_vdd_ci_table(struct pp_hwmgr *hwmgr, - SMU72_Discrete_DpmTable *table) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - uint32_t count; - - table->VddciLevelCount = data->vddci_voltage_table.count; - for (count = 0; count < table->VddciLevelCount; count++) { - if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) { - table->VddciTable[count] = - PP_HOST_TO_SMC_US(data->vddci_voltage_table.entries[count].value * VOLTAGE_SCALE); - } else if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) { - table->SmioTable1.Pattern[count].Voltage = - PP_HOST_TO_SMC_US(data->vddci_voltage_table.entries[count].value * VOLTAGE_SCALE); - /* Index into DpmTable.Smio. Drive bits from Smio entry to get this voltage level. */ - table->SmioTable1.Pattern[count].Smio = - (uint8_t) count; - table->Smio[count] |= - data->vddci_voltage_table.entries[count].smio_low; - table->VddciTable[count] = - PP_HOST_TO_SMC_US(data->vddci_voltage_table.entries[count].value * VOLTAGE_SCALE); - } - } - - table->SmioMask1 = data->vddci_voltage_table.mask_low; - CONVERT_FROM_HOST_TO_SMC_UL(table->VddciLevelCount); - - return 0; -} - -/** - * Mvdd table preparation for SMC. - * - * @param *hwmgr The address of the hardware manager. - * @param *table The SMC DPM table structure to be populated. - * @return 0 - */ -static int tonga_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr, - SMU72_Discrete_DpmTable *table) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - uint32_t count; - - if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) { - table->MvddLevelCount = data->mvdd_voltage_table.count; - for (count = 0; count < table->MvddLevelCount; count++) { - table->SmioTable2.Pattern[count].Voltage = - PP_HOST_TO_SMC_US(data->mvdd_voltage_table.entries[count].value * VOLTAGE_SCALE); - /* Index into DpmTable.Smio. Drive bits from Smio entry to get this voltage level.*/ - table->SmioTable2.Pattern[count].Smio = - (uint8_t) count; - table->Smio[count] |= - data->mvdd_voltage_table.entries[count].smio_low; - } - table->SmioMask2 = data->mvdd_voltage_table.mask_low; - - CONVERT_FROM_HOST_TO_SMC_UL(table->MvddLevelCount); - } - - return 0; -} - -/** - * Preparation of vddc and vddgfx CAC tables for SMC. - * - * @param hwmgr the address of the hardware manager - * @param table the SMC DPM table structure to be populated - * @return always 0 - */ -static int tonga_populate_cac_tables(struct pp_hwmgr *hwmgr, - SMU72_Discrete_DpmTable *table) -{ - uint32_t count; - uint8_t index = 0; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *pptable_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_ppt_v1_voltage_lookup_table *vddgfx_lookup_table = - pptable_info->vddgfx_lookup_table; - struct phm_ppt_v1_voltage_lookup_table *vddc_lookup_table = - pptable_info->vddc_lookup_table; - - /* table is already swapped, so in order to use the value from it - * we need to swap it back. - */ - uint32_t vddc_level_count = PP_SMC_TO_HOST_UL(table->VddcLevelCount); - uint32_t vddgfx_level_count = PP_SMC_TO_HOST_UL(table->VddGfxLevelCount); - - for (count = 0; count < vddc_level_count; count++) { - /* We are populating vddc CAC data to BapmVddc table in split and merged mode */ - index = phm_get_voltage_index(vddc_lookup_table, - data->vddc_voltage_table.entries[count].value); - table->BapmVddcVidLoSidd[count] = - convert_to_vid(vddc_lookup_table->entries[index].us_cac_low); - table->BapmVddcVidHiSidd[count] = - convert_to_vid(vddc_lookup_table->entries[index].us_cac_mid); - table->BapmVddcVidHiSidd2[count] = - convert_to_vid(vddc_lookup_table->entries[index].us_cac_high); - } - - if ((data->vdd_gfx_control == SMU7_VOLTAGE_CONTROL_BY_SVID2)) { - /* We are populating vddgfx CAC data to BapmVddgfx table in split mode */ - for (count = 0; count < vddgfx_level_count; count++) { - index = phm_get_voltage_index(vddgfx_lookup_table, - convert_to_vid(vddgfx_lookup_table->entries[index].us_cac_mid)); - table->BapmVddGfxVidHiSidd2[count] = - convert_to_vid(vddgfx_lookup_table->entries[index].us_cac_high); - } - } else { - for (count = 0; count < vddc_level_count; count++) { - index = phm_get_voltage_index(vddc_lookup_table, - data->vddc_voltage_table.entries[count].value); - table->BapmVddGfxVidLoSidd[count] = - convert_to_vid(vddc_lookup_table->entries[index].us_cac_low); - table->BapmVddGfxVidHiSidd[count] = - convert_to_vid(vddc_lookup_table->entries[index].us_cac_mid); - table->BapmVddGfxVidHiSidd2[count] = - convert_to_vid(vddc_lookup_table->entries[index].us_cac_high); - } - } - - return 0; -} - -/** - * Preparation of voltage tables for SMC. - * - * @param hwmgr the address of the hardware manager - * @param table the SMC DPM table structure to be populated - * @return always 0 - */ - -static int tonga_populate_smc_voltage_tables(struct pp_hwmgr *hwmgr, - SMU72_Discrete_DpmTable *table) -{ - int result; - - result = tonga_populate_smc_vddc_table(hwmgr, table); - PP_ASSERT_WITH_CODE(!result, - "can not populate VDDC voltage table to SMC", - return -EINVAL); - - result = tonga_populate_smc_vdd_ci_table(hwmgr, table); - PP_ASSERT_WITH_CODE(!result, - "can not populate VDDCI voltage table to SMC", - return -EINVAL); - - result = tonga_populate_smc_vdd_gfx_table(hwmgr, table); - PP_ASSERT_WITH_CODE(!result, - "can not populate VDDGFX voltage table to SMC", - return -EINVAL); - - result = tonga_populate_smc_mvdd_table(hwmgr, table); - PP_ASSERT_WITH_CODE(!result, - "can not populate MVDD voltage table to SMC", - return -EINVAL); - - result = tonga_populate_cac_tables(hwmgr, table); - PP_ASSERT_WITH_CODE(!result, - "can not populate CAC voltage tables to SMC", - return -EINVAL); - - return 0; -} - -static int tonga_populate_ulv_level(struct pp_hwmgr *hwmgr, - struct SMU72_Discrete_Ulv *state) -{ - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - - state->CcPwrDynRm = 0; - state->CcPwrDynRm1 = 0; - - state->VddcOffset = (uint16_t) table_info->us_ulv_voltage_offset; - state->VddcOffsetVid = (uint8_t)(table_info->us_ulv_voltage_offset * - VOLTAGE_VID_OFFSET_SCALE2 / VOLTAGE_VID_OFFSET_SCALE1); - - state->VddcPhase = 1; - - CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm); - CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm1); - CONVERT_FROM_HOST_TO_SMC_US(state->VddcOffset); - - return 0; -} - -static int tonga_populate_ulv_state(struct pp_hwmgr *hwmgr, - struct SMU72_Discrete_DpmTable *table) -{ - return tonga_populate_ulv_level(hwmgr, &table->Ulv); -} - -static int tonga_populate_smc_link_level(struct pp_hwmgr *hwmgr, SMU72_Discrete_DpmTable *table) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct smu7_dpm_table *dpm_table = &data->dpm_table; - struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); - uint32_t i; - - /* Index (dpm_table->pcie_speed_table.count) is reserved for PCIE boot level. */ - for (i = 0; i <= dpm_table->pcie_speed_table.count; i++) { - table->LinkLevel[i].PcieGenSpeed = - (uint8_t)dpm_table->pcie_speed_table.dpm_levels[i].value; - table->LinkLevel[i].PcieLaneCount = - (uint8_t)encode_pcie_lane_width(dpm_table->pcie_speed_table.dpm_levels[i].param1); - table->LinkLevel[i].EnabledForActivity = - 1; - table->LinkLevel[i].SPC = - (uint8_t)(data->pcie_spc_cap & 0xff); - table->LinkLevel[i].DownThreshold = - PP_HOST_TO_SMC_UL(5); - table->LinkLevel[i].UpThreshold = - PP_HOST_TO_SMC_UL(30); - } - - smu_data->smc_state_table.LinkLevelCount = - (uint8_t)dpm_table->pcie_speed_table.count; - data->dpm_level_enable_mask.pcie_dpm_enable_mask = - phm_get_dpm_level_enable_mask_value(&dpm_table->pcie_speed_table); - - return 0; -} - -/** - * Calculates the SCLK dividers using the provided engine clock - * - * @param hwmgr the address of the hardware manager - * @param engine_clock the engine clock to use to populate the structure - * @param sclk the SMC SCLK structure to be populated - */ -static int tonga_calculate_sclk_params(struct pp_hwmgr *hwmgr, - uint32_t engine_clock, SMU72_Discrete_GraphicsLevel *sclk) -{ - const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - pp_atomctrl_clock_dividers_vi dividers; - uint32_t spll_func_cntl = data->clock_registers.vCG_SPLL_FUNC_CNTL; - uint32_t spll_func_cntl_3 = data->clock_registers.vCG_SPLL_FUNC_CNTL_3; - uint32_t spll_func_cntl_4 = data->clock_registers.vCG_SPLL_FUNC_CNTL_4; - uint32_t cg_spll_spread_spectrum = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM; - uint32_t cg_spll_spread_spectrum_2 = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM_2; - uint32_t reference_clock; - uint32_t reference_divider; - uint32_t fbdiv; - int result; - - /* get the engine clock dividers for this clock value*/ - result = atomctrl_get_engine_pll_dividers_vi(hwmgr, engine_clock, ÷rs); - - PP_ASSERT_WITH_CODE(result == 0, - "Error retrieving Engine Clock dividers from VBIOS.", return result); - - /* To get FBDIV we need to multiply this by 16384 and divide it by Fref.*/ - reference_clock = atomctrl_get_reference_clock(hwmgr); - - reference_divider = 1 + dividers.uc_pll_ref_div; - - /* low 14 bits is fraction and high 12 bits is divider*/ - fbdiv = dividers.ul_fb_div.ul_fb_divider & 0x3FFFFFF; - - /* SPLL_FUNC_CNTL setup*/ - spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, - CG_SPLL_FUNC_CNTL, SPLL_REF_DIV, dividers.uc_pll_ref_div); - spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, - CG_SPLL_FUNC_CNTL, SPLL_PDIV_A, dividers.uc_pll_post_div); - - /* SPLL_FUNC_CNTL_3 setup*/ - spll_func_cntl_3 = PHM_SET_FIELD(spll_func_cntl_3, - CG_SPLL_FUNC_CNTL_3, SPLL_FB_DIV, fbdiv); - - /* set to use fractional accumulation*/ - spll_func_cntl_3 = PHM_SET_FIELD(spll_func_cntl_3, - CG_SPLL_FUNC_CNTL_3, SPLL_DITHEN, 1); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_EngineSpreadSpectrumSupport)) { - pp_atomctrl_internal_ss_info ss_info; - - uint32_t vcoFreq = engine_clock * dividers.uc_pll_post_div; - if (0 == atomctrl_get_engine_clock_spread_spectrum(hwmgr, vcoFreq, &ss_info)) { - /* - * ss_info.speed_spectrum_percentage -- in unit of 0.01% - * ss_info.speed_spectrum_rate -- in unit of khz - */ - /* clks = reference_clock * 10 / (REFDIV + 1) / speed_spectrum_rate / 2 */ - uint32_t clkS = reference_clock * 5 / (reference_divider * ss_info.speed_spectrum_rate); - - /* clkv = 2 * D * fbdiv / NS */ - uint32_t clkV = 4 * ss_info.speed_spectrum_percentage * fbdiv / (clkS * 10000); - - cg_spll_spread_spectrum = - PHM_SET_FIELD(cg_spll_spread_spectrum, CG_SPLL_SPREAD_SPECTRUM, CLKS, clkS); - cg_spll_spread_spectrum = - PHM_SET_FIELD(cg_spll_spread_spectrum, CG_SPLL_SPREAD_SPECTRUM, SSEN, 1); - cg_spll_spread_spectrum_2 = - PHM_SET_FIELD(cg_spll_spread_spectrum_2, CG_SPLL_SPREAD_SPECTRUM_2, CLKV, clkV); - } - } - - sclk->SclkFrequency = engine_clock; - sclk->CgSpllFuncCntl3 = spll_func_cntl_3; - sclk->CgSpllFuncCntl4 = spll_func_cntl_4; - sclk->SpllSpreadSpectrum = cg_spll_spread_spectrum; - sclk->SpllSpreadSpectrum2 = cg_spll_spread_spectrum_2; - sclk->SclkDid = (uint8_t)dividers.pll_post_divider; - - return 0; -} - -/** - * Populates single SMC SCLK structure using the provided engine clock - * - * @param hwmgr the address of the hardware manager - * @param engine_clock the engine clock to use to populate the structure - * @param sclk the SMC SCLK structure to be populated - */ -static int tonga_populate_single_graphic_level(struct pp_hwmgr *hwmgr, - uint32_t engine_clock, - uint16_t sclk_activity_level_threshold, - SMU72_Discrete_GraphicsLevel *graphic_level) -{ - int result; - uint32_t mvdd; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *pptable_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - - result = tonga_calculate_sclk_params(hwmgr, engine_clock, graphic_level); - - /* populate graphics levels*/ - result = tonga_get_dependency_volt_by_clk(hwmgr, - pptable_info->vdd_dep_on_sclk, engine_clock, - &graphic_level->MinVoltage, &mvdd); - PP_ASSERT_WITH_CODE((!result), - "can not find VDDC voltage value for VDDC " - "engine clock dependency table", return result); - - /* SCLK frequency in units of 10KHz*/ - graphic_level->SclkFrequency = engine_clock; - /* Indicates maximum activity level for this performance level. 50% for now*/ - graphic_level->ActivityLevel = sclk_activity_level_threshold; - - graphic_level->CcPwrDynRm = 0; - graphic_level->CcPwrDynRm1 = 0; - /* this level can be used if activity is high enough.*/ - graphic_level->EnabledForActivity = 0; - /* this level can be used for throttling.*/ - graphic_level->EnabledForThrottle = 1; - graphic_level->UpHyst = 0; - graphic_level->DownHyst = 0; - graphic_level->VoltageDownHyst = 0; - graphic_level->PowerThrottle = 0; - - data->display_timing.min_clock_in_sr = - hwmgr->display_config.min_core_set_clock_in_sr; - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_SclkDeepSleep)) - graphic_level->DeepSleepDivId = - smu7_get_sleep_divider_id_from_clock(engine_clock, - data->display_timing.min_clock_in_sr); - - /* Default to slow, highest DPM level will be set to PPSMC_DISPLAY_WATERMARK_LOW later.*/ - graphic_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - - if (!result) { - /* CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->MinVoltage);*/ - /* CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->MinVddcPhases);*/ - CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->SclkFrequency); - CONVERT_FROM_HOST_TO_SMC_US(graphic_level->ActivityLevel); - CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->CgSpllFuncCntl3); - CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->CgSpllFuncCntl4); - CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->SpllSpreadSpectrum); - CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->SpllSpreadSpectrum2); - CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->CcPwrDynRm); - CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->CcPwrDynRm1); - } - - return result; -} - -/** - * Populates all SMC SCLK levels' structure based on the trimmed allowed dpm engine clock states - * - * @param hwmgr the address of the hardware manager - */ -int tonga_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); - struct phm_ppt_v1_information *pptable_info = (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct smu7_dpm_table *dpm_table = &data->dpm_table; - struct phm_ppt_v1_pcie_table *pcie_table = pptable_info->pcie_table; - uint8_t pcie_entry_count = (uint8_t) data->dpm_table.pcie_speed_table.count; - uint32_t level_array_address = smu_data->smu7_data.dpm_table_start + - offsetof(SMU72_Discrete_DpmTable, GraphicsLevel); - - uint32_t level_array_size = sizeof(SMU72_Discrete_GraphicsLevel) * - SMU72_MAX_LEVELS_GRAPHICS; - - SMU72_Discrete_GraphicsLevel *levels = smu_data->smc_state_table.GraphicsLevel; - - uint32_t i, max_entry; - uint8_t highest_pcie_level_enabled = 0; - uint8_t lowest_pcie_level_enabled = 0, mid_pcie_level_enabled = 0; - uint8_t count = 0; - int result = 0; - - memset(levels, 0x00, level_array_size); - - for (i = 0; i < dpm_table->sclk_table.count; i++) { - result = tonga_populate_single_graphic_level(hwmgr, - dpm_table->sclk_table.dpm_levels[i].value, - (uint16_t)smu_data->activity_target[i], - &(smu_data->smc_state_table.GraphicsLevel[i])); - if (result != 0) - return result; - - /* Making sure only DPM level 0-1 have Deep Sleep Div ID populated. */ - if (i > 1) - smu_data->smc_state_table.GraphicsLevel[i].DeepSleepDivId = 0; - } - - /* Only enable level 0 for now. */ - smu_data->smc_state_table.GraphicsLevel[0].EnabledForActivity = 1; - - /* set highest level watermark to high */ - if (dpm_table->sclk_table.count > 1) - smu_data->smc_state_table.GraphicsLevel[dpm_table->sclk_table.count-1].DisplayWatermark = - PPSMC_DISPLAY_WATERMARK_HIGH; - - smu_data->smc_state_table.GraphicsDpmLevelCount = - (uint8_t)dpm_table->sclk_table.count; - data->dpm_level_enable_mask.sclk_dpm_enable_mask = - phm_get_dpm_level_enable_mask_value(&dpm_table->sclk_table); - - if (pcie_table != NULL) { - PP_ASSERT_WITH_CODE((pcie_entry_count >= 1), - "There must be 1 or more PCIE levels defined in PPTable.", - return -EINVAL); - max_entry = pcie_entry_count - 1; /* for indexing, we need to decrement by 1.*/ - for (i = 0; i < dpm_table->sclk_table.count; i++) { - smu_data->smc_state_table.GraphicsLevel[i].pcieDpmLevel = - (uint8_t) ((i < max_entry) ? i : max_entry); - } - } else { - if (0 == data->dpm_level_enable_mask.pcie_dpm_enable_mask) - pr_err("Pcie Dpm Enablemask is 0 !"); - - while (data->dpm_level_enable_mask.pcie_dpm_enable_mask && - ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & - (1<<(highest_pcie_level_enabled+1))) != 0)) { - highest_pcie_level_enabled++; - } - - while (data->dpm_level_enable_mask.pcie_dpm_enable_mask && - ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & - (1<<lowest_pcie_level_enabled)) == 0)) { - lowest_pcie_level_enabled++; - } - - while ((count < highest_pcie_level_enabled) && - ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & - (1<<(lowest_pcie_level_enabled+1+count))) == 0)) { - count++; - } - mid_pcie_level_enabled = (lowest_pcie_level_enabled+1+count) < highest_pcie_level_enabled ? - (lowest_pcie_level_enabled+1+count) : highest_pcie_level_enabled; - - - /* set pcieDpmLevel to highest_pcie_level_enabled*/ - for (i = 2; i < dpm_table->sclk_table.count; i++) - smu_data->smc_state_table.GraphicsLevel[i].pcieDpmLevel = highest_pcie_level_enabled; - - /* set pcieDpmLevel to lowest_pcie_level_enabled*/ - smu_data->smc_state_table.GraphicsLevel[0].pcieDpmLevel = lowest_pcie_level_enabled; - - /* set pcieDpmLevel to mid_pcie_level_enabled*/ - smu_data->smc_state_table.GraphicsLevel[1].pcieDpmLevel = mid_pcie_level_enabled; - } - /* level count will send to smc once at init smc table and never change*/ - result = smu7_copy_bytes_to_smc(hwmgr, level_array_address, - (uint8_t *)levels, (uint32_t)level_array_size, - SMC_RAM_END); - - return result; -} - -/** - * Populates the SMC MCLK structure using the provided memory clock - * - * @param hwmgr the address of the hardware manager - * @param memory_clock the memory clock to use to populate the structure - * @param sclk the SMC SCLK structure to be populated - */ -static int tonga_calculate_mclk_params( - struct pp_hwmgr *hwmgr, - uint32_t memory_clock, - SMU72_Discrete_MemoryLevel *mclk, - bool strobe_mode, - bool dllStateOn - ) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - uint32_t dll_cntl = data->clock_registers.vDLL_CNTL; - uint32_t mclk_pwrmgt_cntl = data->clock_registers.vMCLK_PWRMGT_CNTL; - uint32_t mpll_ad_func_cntl = data->clock_registers.vMPLL_AD_FUNC_CNTL; - uint32_t mpll_dq_func_cntl = data->clock_registers.vMPLL_DQ_FUNC_CNTL; - uint32_t mpll_func_cntl = data->clock_registers.vMPLL_FUNC_CNTL; - uint32_t mpll_func_cntl_1 = data->clock_registers.vMPLL_FUNC_CNTL_1; - uint32_t mpll_func_cntl_2 = data->clock_registers.vMPLL_FUNC_CNTL_2; - uint32_t mpll_ss1 = data->clock_registers.vMPLL_SS1; - uint32_t mpll_ss2 = data->clock_registers.vMPLL_SS2; - - pp_atomctrl_memory_clock_param mpll_param; - int result; - - result = atomctrl_get_memory_pll_dividers_si(hwmgr, - memory_clock, &mpll_param, strobe_mode); - PP_ASSERT_WITH_CODE( - !result, - "Error retrieving Memory Clock Parameters from VBIOS.", - return result); - - /* MPLL_FUNC_CNTL setup*/ - mpll_func_cntl = PHM_SET_FIELD(mpll_func_cntl, MPLL_FUNC_CNTL, BWCTRL, - mpll_param.bw_ctrl); - - /* MPLL_FUNC_CNTL_1 setup*/ - mpll_func_cntl_1 = PHM_SET_FIELD(mpll_func_cntl_1, - MPLL_FUNC_CNTL_1, CLKF, - mpll_param.mpll_fb_divider.cl_kf); - mpll_func_cntl_1 = PHM_SET_FIELD(mpll_func_cntl_1, - MPLL_FUNC_CNTL_1, CLKFRAC, - mpll_param.mpll_fb_divider.clk_frac); - mpll_func_cntl_1 = PHM_SET_FIELD(mpll_func_cntl_1, - MPLL_FUNC_CNTL_1, VCO_MODE, - mpll_param.vco_mode); - - /* MPLL_AD_FUNC_CNTL setup*/ - mpll_ad_func_cntl = PHM_SET_FIELD(mpll_ad_func_cntl, - MPLL_AD_FUNC_CNTL, YCLK_POST_DIV, - mpll_param.mpll_post_divider); - - if (data->is_memory_gddr5) { - /* MPLL_DQ_FUNC_CNTL setup*/ - mpll_dq_func_cntl = PHM_SET_FIELD(mpll_dq_func_cntl, - MPLL_DQ_FUNC_CNTL, YCLK_SEL, - mpll_param.yclk_sel); - mpll_dq_func_cntl = PHM_SET_FIELD(mpll_dq_func_cntl, - MPLL_DQ_FUNC_CNTL, YCLK_POST_DIV, - mpll_param.mpll_post_divider); - } - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_MemorySpreadSpectrumSupport)) { - /* - ************************************ - Fref = Reference Frequency - NF = Feedback divider ratio - NR = Reference divider ratio - Fnom = Nominal VCO output frequency = Fref * NF / NR - Fs = Spreading Rate - D = Percentage down-spread / 2 - Fint = Reference input frequency to PFD = Fref / NR - NS = Spreading rate divider ratio = int(Fint / (2 * Fs)) - CLKS = NS - 1 = ISS_STEP_NUM[11:0] - NV = D * Fs / Fnom * 4 * ((Fnom/Fref * NR) ^ 2) - CLKV = 65536 * NV = ISS_STEP_SIZE[25:0] - ************************************* - */ - pp_atomctrl_internal_ss_info ss_info; - uint32_t freq_nom; - uint32_t tmp; - uint32_t reference_clock = atomctrl_get_mpll_reference_clock(hwmgr); - - /* for GDDR5 for all modes and DDR3 */ - if (1 == mpll_param.qdr) - freq_nom = memory_clock * 4 * (1 << mpll_param.mpll_post_divider); - else - freq_nom = memory_clock * 2 * (1 << mpll_param.mpll_post_divider); - - /* tmp = (freq_nom / reference_clock * reference_divider) ^ 2 Note: S.I. reference_divider = 1*/ - tmp = (freq_nom / reference_clock); - tmp = tmp * tmp; - - if (0 == atomctrl_get_memory_clock_spread_spectrum(hwmgr, freq_nom, &ss_info)) { - /* ss_info.speed_spectrum_percentage -- in unit of 0.01% */ - /* ss.Info.speed_spectrum_rate -- in unit of khz */ - /* CLKS = reference_clock / (2 * speed_spectrum_rate * reference_divider) * 10 */ - /* = reference_clock * 5 / speed_spectrum_rate */ - uint32_t clks = reference_clock * 5 / ss_info.speed_spectrum_rate; - - /* CLKV = 65536 * speed_spectrum_percentage / 2 * spreadSpecrumRate / freq_nom * 4 / 100000 * ((freq_nom / reference_clock) ^ 2) */ - /* = 131 * speed_spectrum_percentage * speed_spectrum_rate / 100 * ((freq_nom / reference_clock) ^ 2) / freq_nom */ - uint32_t clkv = - (uint32_t)((((131 * ss_info.speed_spectrum_percentage * - ss_info.speed_spectrum_rate) / 100) * tmp) / freq_nom); - - mpll_ss1 = PHM_SET_FIELD(mpll_ss1, MPLL_SS1, CLKV, clkv); - mpll_ss2 = PHM_SET_FIELD(mpll_ss2, MPLL_SS2, CLKS, clks); - } - } - - /* MCLK_PWRMGT_CNTL setup */ - mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, - MCLK_PWRMGT_CNTL, DLL_SPEED, mpll_param.dll_speed); - mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, - MCLK_PWRMGT_CNTL, MRDCK0_PDNB, dllStateOn); - mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, - MCLK_PWRMGT_CNTL, MRDCK1_PDNB, dllStateOn); - - /* Save the result data to outpupt memory level structure */ - mclk->MclkFrequency = memory_clock; - mclk->MpllFuncCntl = mpll_func_cntl; - mclk->MpllFuncCntl_1 = mpll_func_cntl_1; - mclk->MpllFuncCntl_2 = mpll_func_cntl_2; - mclk->MpllAdFuncCntl = mpll_ad_func_cntl; - mclk->MpllDqFuncCntl = mpll_dq_func_cntl; - mclk->MclkPwrmgtCntl = mclk_pwrmgt_cntl; - mclk->DllCntl = dll_cntl; - mclk->MpllSs1 = mpll_ss1; - mclk->MpllSs2 = mpll_ss2; - - return 0; -} - -static uint8_t tonga_get_mclk_frequency_ratio(uint32_t memory_clock, - bool strobe_mode) -{ - uint8_t mc_para_index; - - if (strobe_mode) { - if (memory_clock < 12500) - mc_para_index = 0x00; - else if (memory_clock > 47500) - mc_para_index = 0x0f; - else - mc_para_index = (uint8_t)((memory_clock - 10000) / 2500); - } else { - if (memory_clock < 65000) - mc_para_index = 0x00; - else if (memory_clock > 135000) - mc_para_index = 0x0f; - else - mc_para_index = (uint8_t)((memory_clock - 60000) / 5000); - } - - return mc_para_index; -} - -static uint8_t tonga_get_ddr3_mclk_frequency_ratio(uint32_t memory_clock) -{ - uint8_t mc_para_index; - - if (memory_clock < 10000) - mc_para_index = 0; - else if (memory_clock >= 80000) - mc_para_index = 0x0f; - else - mc_para_index = (uint8_t)((memory_clock - 10000) / 5000 + 1); - - return mc_para_index; -} - - -static int tonga_populate_single_memory_level( - struct pp_hwmgr *hwmgr, - uint32_t memory_clock, - SMU72_Discrete_MemoryLevel *memory_level - ) -{ - uint32_t mvdd = 0; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *pptable_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - int result = 0; - bool dll_state_on; - struct cgs_display_info info = {0}; - uint32_t mclk_edc_wr_enable_threshold = 40000; - uint32_t mclk_stutter_mode_threshold = 30000; - uint32_t mclk_edc_enable_threshold = 40000; - uint32_t mclk_strobe_mode_threshold = 40000; - - if (NULL != pptable_info->vdd_dep_on_mclk) { - result = tonga_get_dependency_volt_by_clk(hwmgr, - pptable_info->vdd_dep_on_mclk, - memory_clock, - &memory_level->MinVoltage, &mvdd); - PP_ASSERT_WITH_CODE( - !result, - "can not find MinVddc voltage value from memory VDDC " - "voltage dependency table", - return result); - } - - if (data->mvdd_control == SMU7_VOLTAGE_CONTROL_NONE) - memory_level->MinMvdd = data->vbios_boot_state.mvdd_bootup_value; - else - memory_level->MinMvdd = mvdd; - - memory_level->EnabledForThrottle = 1; - memory_level->EnabledForActivity = 0; - memory_level->UpHyst = 0; - memory_level->DownHyst = 100; - memory_level->VoltageDownHyst = 0; - - /* Indicates maximum activity level for this performance level.*/ - memory_level->ActivityLevel = (uint16_t)data->mclk_activity_target; - memory_level->StutterEnable = 0; - memory_level->StrobeEnable = 0; - memory_level->EdcReadEnable = 0; - memory_level->EdcWriteEnable = 0; - memory_level->RttEnable = 0; - - /* default set to low watermark. Highest level will be set to high later.*/ - memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - - cgs_get_active_displays_info(hwmgr->device, &info); - data->display_timing.num_existing_displays = info.display_count; - - if ((mclk_stutter_mode_threshold != 0) && - (memory_clock <= mclk_stutter_mode_threshold) && - (!data->is_uvd_enabled) - && (PHM_READ_FIELD(hwmgr->device, DPG_PIPE_STUTTER_CONTROL, STUTTER_ENABLE) & 0x1) - && (data->display_timing.num_existing_displays <= 2) - && (data->display_timing.num_existing_displays != 0)) - memory_level->StutterEnable = 1; - - /* decide strobe mode*/ - memory_level->StrobeEnable = (mclk_strobe_mode_threshold != 0) && - (memory_clock <= mclk_strobe_mode_threshold); - - /* decide EDC mode and memory clock ratio*/ - if (data->is_memory_gddr5) { - memory_level->StrobeRatio = tonga_get_mclk_frequency_ratio(memory_clock, - memory_level->StrobeEnable); - - if ((mclk_edc_enable_threshold != 0) && - (memory_clock > mclk_edc_enable_threshold)) { - memory_level->EdcReadEnable = 1; - } - - if ((mclk_edc_wr_enable_threshold != 0) && - (memory_clock > mclk_edc_wr_enable_threshold)) { - memory_level->EdcWriteEnable = 1; - } - - if (memory_level->StrobeEnable) { - if (tonga_get_mclk_frequency_ratio(memory_clock, 1) >= - ((cgs_read_register(hwmgr->device, mmMC_SEQ_MISC7) >> 16) & 0xf)) { - dll_state_on = ((cgs_read_register(hwmgr->device, mmMC_SEQ_MISC5) >> 1) & 0x1) ? 1 : 0; - } else { - dll_state_on = ((cgs_read_register(hwmgr->device, mmMC_SEQ_MISC6) >> 1) & 0x1) ? 1 : 0; - } - - } else { - dll_state_on = data->dll_default_on; - } - } else { - memory_level->StrobeRatio = - tonga_get_ddr3_mclk_frequency_ratio(memory_clock); - dll_state_on = ((cgs_read_register(hwmgr->device, mmMC_SEQ_MISC5) >> 1) & 0x1) ? 1 : 0; - } - - result = tonga_calculate_mclk_params(hwmgr, - memory_clock, memory_level, memory_level->StrobeEnable, dll_state_on); - - if (!result) { - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MinMvdd); - /* MCLK frequency in units of 10KHz*/ - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MclkFrequency); - /* Indicates maximum activity level for this performance level.*/ - CONVERT_FROM_HOST_TO_SMC_US(memory_level->ActivityLevel); - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllFuncCntl); - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllFuncCntl_1); - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllFuncCntl_2); - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllAdFuncCntl); - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllDqFuncCntl); - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MclkPwrmgtCntl); - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->DllCntl); - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllSs1); - CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllSs2); - } - - return result; -} - -int tonga_populate_all_memory_levels(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct tonga_smumgr *smu_data = - (struct tonga_smumgr *)(hwmgr->smu_backend); - struct smu7_dpm_table *dpm_table = &data->dpm_table; - int result; - - /* populate MCLK dpm table to SMU7 */ - uint32_t level_array_address = - smu_data->smu7_data.dpm_table_start + - offsetof(SMU72_Discrete_DpmTable, MemoryLevel); - uint32_t level_array_size = - sizeof(SMU72_Discrete_MemoryLevel) * - SMU72_MAX_LEVELS_MEMORY; - SMU72_Discrete_MemoryLevel *levels = - smu_data->smc_state_table.MemoryLevel; - uint32_t i; - - memset(levels, 0x00, level_array_size); - - for (i = 0; i < dpm_table->mclk_table.count; i++) { - PP_ASSERT_WITH_CODE((0 != dpm_table->mclk_table.dpm_levels[i].value), - "can not populate memory level as memory clock is zero", - return -EINVAL); - result = tonga_populate_single_memory_level( - hwmgr, - dpm_table->mclk_table.dpm_levels[i].value, - &(smu_data->smc_state_table.MemoryLevel[i])); - if (result) - return result; - } - - /* Only enable level 0 for now.*/ - smu_data->smc_state_table.MemoryLevel[0].EnabledForActivity = 1; - - /* - * in order to prevent MC activity from stutter mode to push DPM up. - * the UVD change complements this by putting the MCLK in a higher state - * by default such that we are not effected by up threshold or and MCLK DPM latency. - */ - smu_data->smc_state_table.MemoryLevel[0].ActivityLevel = 0x1F; - CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table.MemoryLevel[0].ActivityLevel); - - smu_data->smc_state_table.MemoryDpmLevelCount = (uint8_t)dpm_table->mclk_table.count; - data->dpm_level_enable_mask.mclk_dpm_enable_mask = phm_get_dpm_level_enable_mask_value(&dpm_table->mclk_table); - /* set highest level watermark to high*/ - smu_data->smc_state_table.MemoryLevel[dpm_table->mclk_table.count-1].DisplayWatermark = PPSMC_DISPLAY_WATERMARK_HIGH; - - /* level count will send to smc once at init smc table and never change*/ - result = smu7_copy_bytes_to_smc(hwmgr, - level_array_address, (uint8_t *)levels, (uint32_t)level_array_size, - SMC_RAM_END); - - return result; -} - -static int tonga_populate_mvdd_value(struct pp_hwmgr *hwmgr, - uint32_t mclk, SMIO_Pattern *smio_pattern) -{ - const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - uint32_t i = 0; - - if (SMU7_VOLTAGE_CONTROL_NONE != data->mvdd_control) { - /* find mvdd value which clock is more than request */ - for (i = 0; i < table_info->vdd_dep_on_mclk->count; i++) { - if (mclk <= table_info->vdd_dep_on_mclk->entries[i].clk) { - /* Always round to higher voltage. */ - smio_pattern->Voltage = - data->mvdd_voltage_table.entries[i].value; - break; - } - } - - PP_ASSERT_WITH_CODE(i < table_info->vdd_dep_on_mclk->count, - "MVDD Voltage is outside the supported range.", - return -EINVAL); - } else { - return -EINVAL; - } - - return 0; -} - - -static int tonga_populate_smc_acpi_level(struct pp_hwmgr *hwmgr, - SMU72_Discrete_DpmTable *table) -{ - int result = 0; - struct tonga_smumgr *smu_data = - (struct tonga_smumgr *)(hwmgr->smu_backend); - const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct pp_atomctrl_clock_dividers_vi dividers; - - SMIO_Pattern voltage_level; - uint32_t spll_func_cntl = data->clock_registers.vCG_SPLL_FUNC_CNTL; - uint32_t spll_func_cntl_2 = data->clock_registers.vCG_SPLL_FUNC_CNTL_2; - uint32_t dll_cntl = data->clock_registers.vDLL_CNTL; - uint32_t mclk_pwrmgt_cntl = data->clock_registers.vMCLK_PWRMGT_CNTL; - - /* The ACPI state should not do DPM on DC (or ever).*/ - table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC; - - table->ACPILevel.MinVoltage = - smu_data->smc_state_table.GraphicsLevel[0].MinVoltage; - - /* assign zero for now*/ - table->ACPILevel.SclkFrequency = atomctrl_get_reference_clock(hwmgr); - - /* get the engine clock dividers for this clock value*/ - result = atomctrl_get_engine_pll_dividers_vi(hwmgr, - table->ACPILevel.SclkFrequency, ÷rs); - - PP_ASSERT_WITH_CODE(result == 0, - "Error retrieving Engine Clock dividers from VBIOS.", - return result); - - /* divider ID for required SCLK*/ - table->ACPILevel.SclkDid = (uint8_t)dividers.pll_post_divider; - table->ACPILevel.DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - table->ACPILevel.DeepSleepDivId = 0; - - spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, CG_SPLL_FUNC_CNTL, - SPLL_PWRON, 0); - spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, CG_SPLL_FUNC_CNTL, - SPLL_RESET, 1); - spll_func_cntl_2 = PHM_SET_FIELD(spll_func_cntl_2, CG_SPLL_FUNC_CNTL_2, - SCLK_MUX_SEL, 4); - - table->ACPILevel.CgSpllFuncCntl = spll_func_cntl; - table->ACPILevel.CgSpllFuncCntl2 = spll_func_cntl_2; - table->ACPILevel.CgSpllFuncCntl3 = data->clock_registers.vCG_SPLL_FUNC_CNTL_3; - table->ACPILevel.CgSpllFuncCntl4 = data->clock_registers.vCG_SPLL_FUNC_CNTL_4; - table->ACPILevel.SpllSpreadSpectrum = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM; - table->ACPILevel.SpllSpreadSpectrum2 = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM_2; - table->ACPILevel.CcPwrDynRm = 0; - table->ACPILevel.CcPwrDynRm1 = 0; - - - /* For various features to be enabled/disabled while this level is active.*/ - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.Flags); - /* SCLK frequency in units of 10KHz*/ - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SclkFrequency); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl2); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl3); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl4); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SpllSpreadSpectrum); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SpllSpreadSpectrum2); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm); - CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm1); - - /* table->MemoryACPILevel.MinVddcPhases = table->ACPILevel.MinVddcPhases;*/ - table->MemoryACPILevel.MinVoltage = - smu_data->smc_state_table.MemoryLevel[0].MinVoltage; - - /* CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MinVoltage);*/ - - if (0 == tonga_populate_mvdd_value(hwmgr, 0, &voltage_level)) - table->MemoryACPILevel.MinMvdd = - PP_HOST_TO_SMC_UL(voltage_level.Voltage * VOLTAGE_SCALE); - else - table->MemoryACPILevel.MinMvdd = 0; - - /* Force reset on DLL*/ - mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, - MCLK_PWRMGT_CNTL, MRDCK0_RESET, 0x1); - mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, - MCLK_PWRMGT_CNTL, MRDCK1_RESET, 0x1); - - /* Disable DLL in ACPIState*/ - mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, - MCLK_PWRMGT_CNTL, MRDCK0_PDNB, 0); - mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, - MCLK_PWRMGT_CNTL, MRDCK1_PDNB, 0); - - /* Enable DLL bypass signal*/ - dll_cntl = PHM_SET_FIELD(dll_cntl, - DLL_CNTL, MRDCK0_BYPASS, 0); - dll_cntl = PHM_SET_FIELD(dll_cntl, - DLL_CNTL, MRDCK1_BYPASS, 0); - - table->MemoryACPILevel.DllCntl = - PP_HOST_TO_SMC_UL(dll_cntl); - table->MemoryACPILevel.MclkPwrmgtCntl = - PP_HOST_TO_SMC_UL(mclk_pwrmgt_cntl); - table->MemoryACPILevel.MpllAdFuncCntl = - PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_AD_FUNC_CNTL); - table->MemoryACPILevel.MpllDqFuncCntl = - PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_DQ_FUNC_CNTL); - table->MemoryACPILevel.MpllFuncCntl = - PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_FUNC_CNTL); - table->MemoryACPILevel.MpllFuncCntl_1 = - PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_FUNC_CNTL_1); - table->MemoryACPILevel.MpllFuncCntl_2 = - PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_FUNC_CNTL_2); - table->MemoryACPILevel.MpllSs1 = - PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_SS1); - table->MemoryACPILevel.MpllSs2 = - PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_SS2); - - table->MemoryACPILevel.EnabledForThrottle = 0; - table->MemoryACPILevel.EnabledForActivity = 0; - table->MemoryACPILevel.UpHyst = 0; - table->MemoryACPILevel.DownHyst = 100; - table->MemoryACPILevel.VoltageDownHyst = 0; - /* Indicates maximum activity level for this performance level.*/ - table->MemoryACPILevel.ActivityLevel = - PP_HOST_TO_SMC_US((uint16_t)data->mclk_activity_target); - - table->MemoryACPILevel.StutterEnable = 0; - table->MemoryACPILevel.StrobeEnable = 0; - table->MemoryACPILevel.EdcReadEnable = 0; - table->MemoryACPILevel.EdcWriteEnable = 0; - table->MemoryACPILevel.RttEnable = 0; - - return result; -} - -static int tonga_populate_smc_uvd_level(struct pp_hwmgr *hwmgr, - SMU72_Discrete_DpmTable *table) -{ - int result = 0; - - uint8_t count; - pp_atomctrl_clock_dividers_vi dividers; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *pptable_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = - pptable_info->mm_dep_table; - - table->UvdLevelCount = (uint8_t) (mm_table->count); - table->UvdBootLevel = 0; - - for (count = 0; count < table->UvdLevelCount; count++) { - table->UvdLevel[count].VclkFrequency = mm_table->entries[count].vclk; - table->UvdLevel[count].DclkFrequency = mm_table->entries[count].dclk; - table->UvdLevel[count].MinVoltage.Vddc = - phm_get_voltage_index(pptable_info->vddc_lookup_table, - mm_table->entries[count].vddc); - table->UvdLevel[count].MinVoltage.VddGfx = - (data->vdd_gfx_control == SMU7_VOLTAGE_CONTROL_BY_SVID2) ? - phm_get_voltage_index(pptable_info->vddgfx_lookup_table, - mm_table->entries[count].vddgfx) : 0; - table->UvdLevel[count].MinVoltage.Vddci = - phm_get_voltage_id(&data->vddci_voltage_table, - mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); - table->UvdLevel[count].MinVoltage.Phases = 1; - - /* retrieve divider value for VBIOS */ - result = atomctrl_get_dfs_pll_dividers_vi( - hwmgr, - table->UvdLevel[count].VclkFrequency, - ÷rs); - - PP_ASSERT_WITH_CODE((!result), - "can not find divide id for Vclk clock", - return result); - - table->UvdLevel[count].VclkDivider = (uint8_t)dividers.pll_post_divider; - - result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, - table->UvdLevel[count].DclkFrequency, ÷rs); - PP_ASSERT_WITH_CODE((!result), - "can not find divide id for Dclk clock", - return result); - - table->UvdLevel[count].DclkDivider = - (uint8_t)dividers.pll_post_divider; - - CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].VclkFrequency); - CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].DclkFrequency); - } - - return result; - -} - -static int tonga_populate_smc_vce_level(struct pp_hwmgr *hwmgr, - SMU72_Discrete_DpmTable *table) -{ - int result = 0; - - uint8_t count; - pp_atomctrl_clock_dividers_vi dividers; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *pptable_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = - pptable_info->mm_dep_table; - - table->VceLevelCount = (uint8_t) (mm_table->count); - table->VceBootLevel = 0; - - for (count = 0; count < table->VceLevelCount; count++) { - table->VceLevel[count].Frequency = - mm_table->entries[count].eclk; - table->VceLevel[count].MinVoltage.Vddc = - phm_get_voltage_index(pptable_info->vddc_lookup_table, - mm_table->entries[count].vddc); - table->VceLevel[count].MinVoltage.VddGfx = - (data->vdd_gfx_control == SMU7_VOLTAGE_CONTROL_BY_SVID2) ? - phm_get_voltage_index(pptable_info->vddgfx_lookup_table, - mm_table->entries[count].vddgfx) : 0; - table->VceLevel[count].MinVoltage.Vddci = - phm_get_voltage_id(&data->vddci_voltage_table, - mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); - table->VceLevel[count].MinVoltage.Phases = 1; - - /* retrieve divider value for VBIOS */ - result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, - table->VceLevel[count].Frequency, ÷rs); - PP_ASSERT_WITH_CODE((!result), - "can not find divide id for VCE engine clock", - return result); - - table->VceLevel[count].Divider = (uint8_t)dividers.pll_post_divider; - - CONVERT_FROM_HOST_TO_SMC_UL(table->VceLevel[count].Frequency); - } - - return result; -} - -static int tonga_populate_smc_acp_level(struct pp_hwmgr *hwmgr, - SMU72_Discrete_DpmTable *table) -{ - int result = 0; - uint8_t count; - pp_atomctrl_clock_dividers_vi dividers; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *pptable_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = - pptable_info->mm_dep_table; - - table->AcpLevelCount = (uint8_t) (mm_table->count); - table->AcpBootLevel = 0; - - for (count = 0; count < table->AcpLevelCount; count++) { - table->AcpLevel[count].Frequency = - pptable_info->mm_dep_table->entries[count].aclk; - table->AcpLevel[count].MinVoltage.Vddc = - phm_get_voltage_index(pptable_info->vddc_lookup_table, - mm_table->entries[count].vddc); - table->AcpLevel[count].MinVoltage.VddGfx = - (data->vdd_gfx_control == SMU7_VOLTAGE_CONTROL_BY_SVID2) ? - phm_get_voltage_index(pptable_info->vddgfx_lookup_table, - mm_table->entries[count].vddgfx) : 0; - table->AcpLevel[count].MinVoltage.Vddci = - phm_get_voltage_id(&data->vddci_voltage_table, - mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); - table->AcpLevel[count].MinVoltage.Phases = 1; - - /* retrieve divider value for VBIOS */ - result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, - table->AcpLevel[count].Frequency, ÷rs); - PP_ASSERT_WITH_CODE((!result), - "can not find divide id for engine clock", return result); - - table->AcpLevel[count].Divider = (uint8_t)dividers.pll_post_divider; - - CONVERT_FROM_HOST_TO_SMC_UL(table->AcpLevel[count].Frequency); - } - - return result; -} - -static int tonga_populate_smc_samu_level(struct pp_hwmgr *hwmgr, - SMU72_Discrete_DpmTable *table) -{ - int result = 0; - uint8_t count; - pp_atomctrl_clock_dividers_vi dividers; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *pptable_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = - pptable_info->mm_dep_table; - - table->SamuBootLevel = 0; - table->SamuLevelCount = (uint8_t) (mm_table->count); - - for (count = 0; count < table->SamuLevelCount; count++) { - /* not sure whether we need evclk or not */ - table->SamuLevel[count].Frequency = - pptable_info->mm_dep_table->entries[count].samclock; - table->SamuLevel[count].MinVoltage.Vddc = - phm_get_voltage_index(pptable_info->vddc_lookup_table, - mm_table->entries[count].vddc); - table->SamuLevel[count].MinVoltage.VddGfx = - (data->vdd_gfx_control == SMU7_VOLTAGE_CONTROL_BY_SVID2) ? - phm_get_voltage_index(pptable_info->vddgfx_lookup_table, - mm_table->entries[count].vddgfx) : 0; - table->SamuLevel[count].MinVoltage.Vddci = - phm_get_voltage_id(&data->vddci_voltage_table, - mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); - table->SamuLevel[count].MinVoltage.Phases = 1; - - /* retrieve divider value for VBIOS */ - result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, - table->SamuLevel[count].Frequency, ÷rs); - PP_ASSERT_WITH_CODE((!result), - "can not find divide id for samu clock", return result); - - table->SamuLevel[count].Divider = (uint8_t)dividers.pll_post_divider; - - CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].Frequency); - } - - return result; -} - -static int tonga_populate_memory_timing_parameters( - struct pp_hwmgr *hwmgr, - uint32_t engine_clock, - uint32_t memory_clock, - struct SMU72_Discrete_MCArbDramTimingTableEntry *arb_regs - ) -{ - uint32_t dramTiming; - uint32_t dramTiming2; - uint32_t burstTime; - int result; - - result = atomctrl_set_engine_dram_timings_rv770(hwmgr, - engine_clock, memory_clock); - - PP_ASSERT_WITH_CODE(result == 0, - "Error calling VBIOS to set DRAM_TIMING.", return result); - - dramTiming = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING); - dramTiming2 = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING2); - burstTime = PHM_READ_FIELD(hwmgr->device, MC_ARB_BURST_TIME, STATE0); - - arb_regs->McArbDramTiming = PP_HOST_TO_SMC_UL(dramTiming); - arb_regs->McArbDramTiming2 = PP_HOST_TO_SMC_UL(dramTiming2); - arb_regs->McArbBurstTime = (uint8_t)burstTime; - - return 0; -} - -/** - * Setup parameters for the MC ARB. - * - * @param hwmgr the address of the powerplay hardware manager. - * @return always 0 - * This function is to be called from the SetPowerState table. - */ -static int tonga_program_memory_timing_parameters(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct tonga_smumgr *smu_data = - (struct tonga_smumgr *)(hwmgr->smu_backend); - int result = 0; - SMU72_Discrete_MCArbDramTimingTable arb_regs; - uint32_t i, j; - - memset(&arb_regs, 0x00, sizeof(SMU72_Discrete_MCArbDramTimingTable)); - - for (i = 0; i < data->dpm_table.sclk_table.count; i++) { - for (j = 0; j < data->dpm_table.mclk_table.count; j++) { - result = tonga_populate_memory_timing_parameters - (hwmgr, data->dpm_table.sclk_table.dpm_levels[i].value, - data->dpm_table.mclk_table.dpm_levels[j].value, - &arb_regs.entries[i][j]); - - if (result) - break; - } - } - - if (!result) { - result = smu7_copy_bytes_to_smc( - hwmgr, - smu_data->smu7_data.arb_table_start, - (uint8_t *)&arb_regs, - sizeof(SMU72_Discrete_MCArbDramTimingTable), - SMC_RAM_END - ); - } - - return result; -} - -static int tonga_populate_smc_boot_level(struct pp_hwmgr *hwmgr, - SMU72_Discrete_DpmTable *table) -{ - int result = 0; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct tonga_smumgr *smu_data = - (struct tonga_smumgr *)(hwmgr->smu_backend); - table->GraphicsBootLevel = 0; - table->MemoryBootLevel = 0; - - /* find boot level from dpm table*/ - result = phm_find_boot_level(&(data->dpm_table.sclk_table), - data->vbios_boot_state.sclk_bootup_value, - (uint32_t *)&(smu_data->smc_state_table.GraphicsBootLevel)); - - if (result != 0) { - smu_data->smc_state_table.GraphicsBootLevel = 0; - pr_err("[powerplay] VBIOS did not find boot engine " - "clock value in dependency table. " - "Using Graphics DPM level 0 !"); - result = 0; - } - - result = phm_find_boot_level(&(data->dpm_table.mclk_table), - data->vbios_boot_state.mclk_bootup_value, - (uint32_t *)&(smu_data->smc_state_table.MemoryBootLevel)); - - if (result != 0) { - smu_data->smc_state_table.MemoryBootLevel = 0; - pr_err("[powerplay] VBIOS did not find boot " - "engine clock value in dependency table." - "Using Memory DPM level 0 !"); - result = 0; - } - - table->BootVoltage.Vddc = - phm_get_voltage_id(&(data->vddc_voltage_table), - data->vbios_boot_state.vddc_bootup_value); - table->BootVoltage.VddGfx = - phm_get_voltage_id(&(data->vddgfx_voltage_table), - data->vbios_boot_state.vddgfx_bootup_value); - table->BootVoltage.Vddci = - phm_get_voltage_id(&(data->vddci_voltage_table), - data->vbios_boot_state.vddci_bootup_value); - table->BootMVdd = data->vbios_boot_state.mvdd_bootup_value; - - CONVERT_FROM_HOST_TO_SMC_US(table->BootMVdd); - - return result; -} - -static int tonga_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr) -{ - uint32_t ro, efuse, efuse2, clock_freq, volt_without_cks, - volt_with_cks, value; - uint16_t clock_freq_u16; - struct tonga_smumgr *smu_data = - (struct tonga_smumgr *)(hwmgr->smu_backend); - uint8_t type, i, j, cks_setting, stretch_amount, stretch_amount2, - volt_offset = 0; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table = - table_info->vdd_dep_on_sclk; - uint32_t hw_revision, dev_id; - struct cgs_system_info sys_info = {0}; - - stretch_amount = (uint8_t)table_info->cac_dtp_table->usClockStretchAmount; - - sys_info.size = sizeof(struct cgs_system_info); - - sys_info.info_id = CGS_SYSTEM_INFO_PCIE_REV; - cgs_query_system_info(hwmgr->device, &sys_info); - hw_revision = (uint32_t)sys_info.value; - - sys_info.info_id = CGS_SYSTEM_INFO_PCIE_DEV; - cgs_query_system_info(hwmgr->device, &sys_info); - dev_id = (uint32_t)sys_info.value; - - /* Read SMU_Eefuse to read and calculate RO and determine - * if the part is SS or FF. if RO >= 1660MHz, part is FF. - */ - efuse = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, - ixSMU_EFUSE_0 + (146 * 4)); - efuse2 = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, - ixSMU_EFUSE_0 + (148 * 4)); - efuse &= 0xFF000000; - efuse = efuse >> 24; - efuse2 &= 0xF; - - if (efuse2 == 1) - ro = (2300 - 1350) * efuse / 255 + 1350; - else - ro = (2500 - 1000) * efuse / 255 + 1000; - - if (ro >= 1660) - type = 0; - else - type = 1; - - /* Populate Stretch amount */ - smu_data->smc_state_table.ClockStretcherAmount = stretch_amount; - - - /* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */ - for (i = 0; i < sclk_table->count; i++) { - smu_data->smc_state_table.Sclk_CKS_masterEn0_7 |= - sclk_table->entries[i].cks_enable << i; - if (ASICID_IS_TONGA_P(dev_id, hw_revision)) { - volt_without_cks = (uint32_t)((7732 + 60 - ro - 20838 * - (sclk_table->entries[i].clk/100) / 10000) * 1000 / - (8730 - (5301 * (sclk_table->entries[i].clk/100) / 1000))); - volt_with_cks = (uint32_t)((5250 + 51 - ro - 2404 * - (sclk_table->entries[i].clk/100) / 100000) * 1000 / - (6146 - (3193 * (sclk_table->entries[i].clk/100) / 1000))); - } else { - volt_without_cks = (uint32_t)((14041 * - (sclk_table->entries[i].clk/100) / 10000 + 3571 + 75 - ro) * 1000 / - (4026 - (13924 * (sclk_table->entries[i].clk/100) / 10000))); - volt_with_cks = (uint32_t)((13946 * - (sclk_table->entries[i].clk/100) / 10000 + 3320 + 45 - ro) * 1000 / - (3664 - (11454 * (sclk_table->entries[i].clk/100) / 10000))); - } - if (volt_without_cks >= volt_with_cks) - volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks + - sclk_table->entries[i].cks_voffset) * 100 / 625) + 1); - smu_data->smc_state_table.Sclk_voltageOffset[i] = volt_offset; - } - - PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, - STRETCH_ENABLE, 0x0); - PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, - masterReset, 0x1); - PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, - staticEnable, 0x1); - PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, - masterReset, 0x0); - - /* Populate CKS Lookup Table */ - if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5) - stretch_amount2 = 0; - else if (stretch_amount == 3 || stretch_amount == 4) - stretch_amount2 = 1; - else { - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_ClockStretcher); - PP_ASSERT_WITH_CODE(false, - "Stretch Amount in PPTable not supported\n", - return -EINVAL); - } - - value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, - ixPWR_CKS_CNTL); - value &= 0xFFC2FF87; - smu_data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].minFreq = - tonga_clock_stretcher_lookup_table[stretch_amount2][0]; - smu_data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].maxFreq = - tonga_clock_stretcher_lookup_table[stretch_amount2][1]; - clock_freq_u16 = (uint16_t)(PP_SMC_TO_HOST_UL(smu_data->smc_state_table. - GraphicsLevel[smu_data->smc_state_table.GraphicsDpmLevelCount - 1]. - SclkFrequency) / 100); - if (tonga_clock_stretcher_lookup_table[stretch_amount2][0] < - clock_freq_u16 && - tonga_clock_stretcher_lookup_table[stretch_amount2][1] > - clock_freq_u16) { - /* Program PWR_CKS_CNTL. CKS_USE_FOR_LOW_FREQ */ - value |= (tonga_clock_stretcher_lookup_table[stretch_amount2][3]) << 16; - /* Program PWR_CKS_CNTL. CKS_LDO_REFSEL */ - value |= (tonga_clock_stretcher_lookup_table[stretch_amount2][2]) << 18; - /* Program PWR_CKS_CNTL. CKS_STRETCH_AMOUNT */ - value |= (tonga_clock_stretch_amount_conversion - [tonga_clock_stretcher_lookup_table[stretch_amount2][3]] - [stretch_amount]) << 3; - } - CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table.CKS_LOOKUPTable. - CKS_LOOKUPTableEntry[0].minFreq); - CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table.CKS_LOOKUPTable. - CKS_LOOKUPTableEntry[0].maxFreq); - smu_data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting = - tonga_clock_stretcher_lookup_table[stretch_amount2][2] & 0x7F; - smu_data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting |= - (tonga_clock_stretcher_lookup_table[stretch_amount2][3]) << 7; - - cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, - ixPWR_CKS_CNTL, value); - - /* Populate DDT Lookup Table */ - for (i = 0; i < 4; i++) { - /* Assign the minimum and maximum VID stored - * in the last row of Clock Stretcher Voltage Table. - */ - smu_data->smc_state_table.ClockStretcherDataTable. - ClockStretcherDataTableEntry[i].minVID = - (uint8_t) tonga_clock_stretcher_ddt_table[type][i][2]; - smu_data->smc_state_table.ClockStretcherDataTable. - ClockStretcherDataTableEntry[i].maxVID = - (uint8_t) tonga_clock_stretcher_ddt_table[type][i][3]; - /* Loop through each SCLK and check the frequency - * to see if it lies within the frequency for clock stretcher. - */ - for (j = 0; j < smu_data->smc_state_table.GraphicsDpmLevelCount; j++) { - cks_setting = 0; - clock_freq = PP_SMC_TO_HOST_UL( - smu_data->smc_state_table.GraphicsLevel[j].SclkFrequency); - /* Check the allowed frequency against the sclk level[j]. - * Sclk's endianness has already been converted, - * and it's in 10Khz unit, - * as opposed to Data table, which is in Mhz unit. - */ - if (clock_freq >= tonga_clock_stretcher_ddt_table[type][i][0] * 100) { - cks_setting |= 0x2; - if (clock_freq < tonga_clock_stretcher_ddt_table[type][i][1] * 100) - cks_setting |= 0x1; - } - smu_data->smc_state_table.ClockStretcherDataTable. - ClockStretcherDataTableEntry[i].setting |= cks_setting << (j * 2); - } - CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table. - ClockStretcherDataTable. - ClockStretcherDataTableEntry[i].setting); - } - - value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, - ixPWR_CKS_CNTL); - value &= 0xFFFFFFFE; - cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, - ixPWR_CKS_CNTL, value); - - return 0; -} - -/** - * Populates the SMC VRConfig field in DPM table. - * - * @param hwmgr the address of the hardware manager - * @param table the SMC DPM table structure to be populated - * @return always 0 - */ -static int tonga_populate_vr_config(struct pp_hwmgr *hwmgr, - SMU72_Discrete_DpmTable *table) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - uint16_t config; - - if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vdd_gfx_control) { - /* Splitted mode */ - config = VR_SVI2_PLANE_1; - table->VRConfig |= (config<<VRCONF_VDDGFX_SHIFT); - - if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) { - config = VR_SVI2_PLANE_2; - table->VRConfig |= config; - } else { - pr_err("VDDC and VDDGFX should " - "be both on SVI2 control in splitted mode !\n"); - } - } else { - /* Merged mode */ - config = VR_MERGED_WITH_VDDC; - table->VRConfig |= (config<<VRCONF_VDDGFX_SHIFT); - - /* Set Vddc Voltage Controller */ - if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) { - config = VR_SVI2_PLANE_1; - table->VRConfig |= config; - } else { - pr_err("VDDC should be on " - "SVI2 control in merged mode !\n"); - } - } - - /* Set Vddci Voltage Controller */ - if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) { - config = VR_SVI2_PLANE_2; /* only in merged mode */ - table->VRConfig |= (config<<VRCONF_VDDCI_SHIFT); - } else if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) { - config = VR_SMIO_PATTERN_1; - table->VRConfig |= (config<<VRCONF_VDDCI_SHIFT); - } - - /* Set Mvdd Voltage Controller */ - if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) { - config = VR_SMIO_PATTERN_2; - table->VRConfig |= (config<<VRCONF_MVDD_SHIFT); - } - - return 0; -} - - -/** - * Initialize the ARB DRAM timing table's index field. - * - * @param hwmgr the address of the powerplay hardware manager. - * @return always 0 - */ -static int tonga_init_arb_table_index(struct pp_hwmgr *hwmgr) -{ - struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); - uint32_t tmp; - int result; - - /* - * This is a read-modify-write on the first byte of the ARB table. - * The first byte in the SMU72_Discrete_MCArbDramTimingTable structure - * is the field 'current'. - * This solution is ugly, but we never write the whole table only - * individual fields in it. - * In reality this field should not be in that structure - * but in a soft register. - */ - result = smu7_read_smc_sram_dword(hwmgr, - smu_data->smu7_data.arb_table_start, &tmp, SMC_RAM_END); - - if (result != 0) - return result; - - tmp &= 0x00FFFFFF; - tmp |= ((uint32_t)MC_CG_ARB_FREQ_F1) << 24; - - return smu7_write_smc_sram_dword(hwmgr, - smu_data->smu7_data.arb_table_start, tmp, SMC_RAM_END); -} - - -static int tonga_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr) -{ - struct tonga_smumgr *smu_data = - (struct tonga_smumgr *)(hwmgr->smu_backend); - const struct tonga_pt_defaults *defaults = smu_data->power_tune_defaults; - SMU72_Discrete_DpmTable *dpm_table = &(smu_data->smc_state_table); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_cac_tdp_table *cac_dtp_table = table_info->cac_dtp_table; - int i, j, k; - const uint16_t *pdef1, *pdef2; - - dpm_table->DefaultTdp = PP_HOST_TO_SMC_US( - (uint16_t)(cac_dtp_table->usTDP * 256)); - dpm_table->TargetTdp = PP_HOST_TO_SMC_US( - (uint16_t)(cac_dtp_table->usConfigurableTDP * 256)); - - PP_ASSERT_WITH_CODE(cac_dtp_table->usTargetOperatingTemp <= 255, - "Target Operating Temp is out of Range !", - ); - - dpm_table->GpuTjMax = (uint8_t)(cac_dtp_table->usTargetOperatingTemp); - dpm_table->GpuTjHyst = 8; - - dpm_table->DTEAmbientTempBase = defaults->dte_ambient_temp_base; - - dpm_table->BAPM_TEMP_GRADIENT = - PP_HOST_TO_SMC_UL(defaults->bapm_temp_gradient); - pdef1 = defaults->bapmti_r; - pdef2 = defaults->bapmti_rc; - - for (i = 0; i < SMU72_DTE_ITERATIONS; i++) { - for (j = 0; j < SMU72_DTE_SOURCES; j++) { - for (k = 0; k < SMU72_DTE_SINKS; k++) { - dpm_table->BAPMTI_R[i][j][k] = - PP_HOST_TO_SMC_US(*pdef1); - dpm_table->BAPMTI_RC[i][j][k] = - PP_HOST_TO_SMC_US(*pdef2); - pdef1++; - pdef2++; - } - } - } - - return 0; -} - -static int tonga_populate_svi_load_line(struct pp_hwmgr *hwmgr) -{ - struct tonga_smumgr *smu_data = - (struct tonga_smumgr *)(hwmgr->smu_backend); - const struct tonga_pt_defaults *defaults = smu_data->power_tune_defaults; - - smu_data->power_tune_table.SviLoadLineEn = defaults->svi_load_line_en; - smu_data->power_tune_table.SviLoadLineVddC = defaults->svi_load_line_vddC; - smu_data->power_tune_table.SviLoadLineTrimVddC = 3; - smu_data->power_tune_table.SviLoadLineOffsetVddC = 0; - - return 0; -} - -static int tonga_populate_tdc_limit(struct pp_hwmgr *hwmgr) -{ - uint16_t tdc_limit; - struct tonga_smumgr *smu_data = - (struct tonga_smumgr *)(hwmgr->smu_backend); - const struct tonga_pt_defaults *defaults = smu_data->power_tune_defaults; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - - /* TDC number of fraction bits are changed from 8 to 7 - * for Fiji as requested by SMC team - */ - tdc_limit = (uint16_t)(table_info->cac_dtp_table->usTDC * 256); - smu_data->power_tune_table.TDC_VDDC_PkgLimit = - CONVERT_FROM_HOST_TO_SMC_US(tdc_limit); - smu_data->power_tune_table.TDC_VDDC_ThrottleReleaseLimitPerc = - defaults->tdc_vddc_throttle_release_limit_perc; - smu_data->power_tune_table.TDC_MAWt = defaults->tdc_mawt; - - return 0; -} - -static int tonga_populate_dw8(struct pp_hwmgr *hwmgr, uint32_t fuse_table_offset) -{ - struct tonga_smumgr *smu_data = - (struct tonga_smumgr *)(hwmgr->smu_backend); - const struct tonga_pt_defaults *defaults = smu_data->power_tune_defaults; - uint32_t temp; - - if (smu7_read_smc_sram_dword(hwmgr, - fuse_table_offset + - offsetof(SMU72_Discrete_PmFuses, TdcWaterfallCtl), - (uint32_t *)&temp, SMC_RAM_END)) - PP_ASSERT_WITH_CODE(false, - "Attempt to read PmFuses.DW6 " - "(SviLoadLineEn) from SMC Failed !", - return -EINVAL); - else - smu_data->power_tune_table.TdcWaterfallCtl = defaults->tdc_waterfall_ctl; - - return 0; -} - -static int tonga_populate_temperature_scaler(struct pp_hwmgr *hwmgr) -{ - int i; - struct tonga_smumgr *smu_data = - (struct tonga_smumgr *)(hwmgr->smu_backend); - - /* Currently not used. Set all to zero. */ - for (i = 0; i < 16; i++) - smu_data->power_tune_table.LPMLTemperatureScaler[i] = 0; - - return 0; -} - -static int tonga_populate_fuzzy_fan(struct pp_hwmgr *hwmgr) -{ - struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); - - if ((hwmgr->thermal_controller.advanceFanControlParameters. - usFanOutputSensitivity & (1 << 15)) || - (hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity == 0)) - hwmgr->thermal_controller.advanceFanControlParameters. - usFanOutputSensitivity = hwmgr->thermal_controller. - advanceFanControlParameters.usDefaultFanOutputSensitivity; - - smu_data->power_tune_table.FuzzyFan_PwmSetDelta = - PP_HOST_TO_SMC_US(hwmgr->thermal_controller. - advanceFanControlParameters.usFanOutputSensitivity); - return 0; -} - -static int tonga_populate_gnb_lpml(struct pp_hwmgr *hwmgr) -{ - int i; - struct tonga_smumgr *smu_data = - (struct tonga_smumgr *)(hwmgr->smu_backend); - - /* Currently not used. Set all to zero. */ - for (i = 0; i < 16; i++) - smu_data->power_tune_table.GnbLPML[i] = 0; - - return 0; -} - -static int tonga_populate_bapm_vddc_base_leakage_sidd(struct pp_hwmgr *hwmgr) -{ - struct tonga_smumgr *smu_data = - (struct tonga_smumgr *)(hwmgr->smu_backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - uint16_t hi_sidd = smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd; - uint16_t lo_sidd = smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd; - struct phm_cac_tdp_table *cac_table = table_info->cac_dtp_table; - - hi_sidd = (uint16_t)(cac_table->usHighCACLeakage / 100 * 256); - lo_sidd = (uint16_t)(cac_table->usLowCACLeakage / 100 * 256); - - smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd = - CONVERT_FROM_HOST_TO_SMC_US(hi_sidd); - smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd = - CONVERT_FROM_HOST_TO_SMC_US(lo_sidd); - - return 0; -} - -static int tonga_populate_pm_fuses(struct pp_hwmgr *hwmgr) -{ - struct tonga_smumgr *smu_data = - (struct tonga_smumgr *)(hwmgr->smu_backend); - uint32_t pm_fuse_table_offset; - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_PowerContainment)) { - if (smu7_read_smc_sram_dword(hwmgr, - SMU72_FIRMWARE_HEADER_LOCATION + - offsetof(SMU72_Firmware_Header, PmFuseTable), - &pm_fuse_table_offset, SMC_RAM_END)) - PP_ASSERT_WITH_CODE(false, - "Attempt to get pm_fuse_table_offset Failed !", - return -EINVAL); - - /* DW6 */ - if (tonga_populate_svi_load_line(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate SviLoadLine Failed !", - return -EINVAL); - /* DW7 */ - if (tonga_populate_tdc_limit(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate TDCLimit Failed !", - return -EINVAL); - /* DW8 */ - if (tonga_populate_dw8(hwmgr, pm_fuse_table_offset)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate TdcWaterfallCtl Failed !", - return -EINVAL); - - /* DW9-DW12 */ - if (tonga_populate_temperature_scaler(hwmgr) != 0) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate LPMLTemperatureScaler Failed !", - return -EINVAL); - - /* DW13-DW14 */ - if (tonga_populate_fuzzy_fan(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate Fuzzy Fan " - "Control parameters Failed !", - return -EINVAL); - - /* DW15-DW18 */ - if (tonga_populate_gnb_lpml(hwmgr)) - PP_ASSERT_WITH_CODE(false, - "Attempt to populate GnbLPML Failed !", - return -EINVAL); - - /* DW20 */ - if (tonga_populate_bapm_vddc_base_leakage_sidd(hwmgr)) - PP_ASSERT_WITH_CODE( - false, - "Attempt to populate BapmVddCBaseLeakage " - "Hi and Lo Sidd Failed !", - return -EINVAL); - - if (smu7_copy_bytes_to_smc(hwmgr, pm_fuse_table_offset, - (uint8_t *)&smu_data->power_tune_table, - sizeof(struct SMU72_Discrete_PmFuses), SMC_RAM_END)) - PP_ASSERT_WITH_CODE(false, - "Attempt to download PmFuseTable Failed !", - return -EINVAL); - } - return 0; -} - -static int tonga_populate_mc_reg_address(struct pp_hwmgr *hwmgr, - SMU72_Discrete_MCRegisters *mc_reg_table) -{ - const struct tonga_smumgr *smu_data = (struct tonga_smumgr *)hwmgr->smu_backend; - - uint32_t i, j; - - for (i = 0, j = 0; j < smu_data->mc_reg_table.last; j++) { - if (smu_data->mc_reg_table.validflag & 1<<j) { - PP_ASSERT_WITH_CODE( - i < SMU72_DISCRETE_MC_REGISTER_ARRAY_SIZE, - "Index of mc_reg_table->address[] array " - "out of boundary", - return -EINVAL); - mc_reg_table->address[i].s0 = - PP_HOST_TO_SMC_US(smu_data->mc_reg_table.mc_reg_address[j].s0); - mc_reg_table->address[i].s1 = - PP_HOST_TO_SMC_US(smu_data->mc_reg_table.mc_reg_address[j].s1); - i++; - } - } - - mc_reg_table->last = (uint8_t)i; - - return 0; -} - -/*convert register values from driver to SMC format */ -static void tonga_convert_mc_registers( - const struct tonga_mc_reg_entry *entry, - SMU72_Discrete_MCRegisterSet *data, - uint32_t num_entries, uint32_t valid_flag) -{ - uint32_t i, j; - - for (i = 0, j = 0; j < num_entries; j++) { - if (valid_flag & 1<<j) { - data->value[i] = PP_HOST_TO_SMC_UL(entry->mc_data[j]); - i++; - } - } -} - -static int tonga_convert_mc_reg_table_entry_to_smc( - struct pp_hwmgr *hwmgr, - const uint32_t memory_clock, - SMU72_Discrete_MCRegisterSet *mc_reg_table_data - ) -{ - struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); - uint32_t i = 0; - - for (i = 0; i < smu_data->mc_reg_table.num_entries; i++) { - if (memory_clock <= - smu_data->mc_reg_table.mc_reg_table_entry[i].mclk_max) { - break; - } - } - - if ((i == smu_data->mc_reg_table.num_entries) && (i > 0)) - --i; - - tonga_convert_mc_registers(&smu_data->mc_reg_table.mc_reg_table_entry[i], - mc_reg_table_data, smu_data->mc_reg_table.last, - smu_data->mc_reg_table.validflag); - - return 0; -} - -static int tonga_convert_mc_reg_table_to_smc(struct pp_hwmgr *hwmgr, - SMU72_Discrete_MCRegisters *mc_regs) -{ - int result = 0; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - int res; - uint32_t i; - - for (i = 0; i < data->dpm_table.mclk_table.count; i++) { - res = tonga_convert_mc_reg_table_entry_to_smc( - hwmgr, - data->dpm_table.mclk_table.dpm_levels[i].value, - &mc_regs->data[i] - ); - - if (0 != res) - result = res; - } - - return result; -} - -static int tonga_update_and_upload_mc_reg_table(struct pp_hwmgr *hwmgr) -{ - struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - uint32_t address; - int32_t result; - - if (0 == (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) - return 0; - - - memset(&smu_data->mc_regs, 0, sizeof(SMU72_Discrete_MCRegisters)); - - result = tonga_convert_mc_reg_table_to_smc(hwmgr, &(smu_data->mc_regs)); - - if (result != 0) - return result; - - - address = smu_data->smu7_data.mc_reg_table_start + - (uint32_t)offsetof(SMU72_Discrete_MCRegisters, data[0]); - - return smu7_copy_bytes_to_smc( - hwmgr, address, - (uint8_t *)&smu_data->mc_regs.data[0], - sizeof(SMU72_Discrete_MCRegisterSet) * - data->dpm_table.mclk_table.count, - SMC_RAM_END); -} - -static int tonga_populate_initial_mc_reg_table(struct pp_hwmgr *hwmgr) -{ - int result; - struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); - - memset(&smu_data->mc_regs, 0x00, sizeof(SMU72_Discrete_MCRegisters)); - result = tonga_populate_mc_reg_address(hwmgr, &(smu_data->mc_regs)); - PP_ASSERT_WITH_CODE(!result, - "Failed to initialize MCRegTable for the MC register addresses !", - return result;); - - result = tonga_convert_mc_reg_table_to_smc(hwmgr, &smu_data->mc_regs); - PP_ASSERT_WITH_CODE(!result, - "Failed to initialize MCRegTable for driver state !", - return result;); - - return smu7_copy_bytes_to_smc(hwmgr, smu_data->smu7_data.mc_reg_table_start, - (uint8_t *)&smu_data->mc_regs, sizeof(SMU72_Discrete_MCRegisters), SMC_RAM_END); -} - -static void tonga_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr) -{ - struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - - if (table_info && - table_info->cac_dtp_table->usPowerTuneDataSetID <= POWERTUNE_DEFAULT_SET_MAX && - table_info->cac_dtp_table->usPowerTuneDataSetID) - smu_data->power_tune_defaults = - &tonga_power_tune_data_set_array - [table_info->cac_dtp_table->usPowerTuneDataSetID - 1]; - else - smu_data->power_tune_defaults = &tonga_power_tune_data_set_array[0]; -} - -static void tonga_save_default_power_profile(struct pp_hwmgr *hwmgr) -{ - struct tonga_smumgr *data = (struct tonga_smumgr *)(hwmgr->smu_backend); - struct SMU72_Discrete_GraphicsLevel *levels = - data->smc_state_table.GraphicsLevel; - unsigned min_level = 1; - - hwmgr->default_gfx_power_profile.activity_threshold = - be16_to_cpu(levels[0].ActivityLevel); - hwmgr->default_gfx_power_profile.up_hyst = levels[0].UpHyst; - hwmgr->default_gfx_power_profile.down_hyst = levels[0].DownHyst; - hwmgr->default_gfx_power_profile.type = AMD_PP_GFX_PROFILE; - - hwmgr->default_compute_power_profile = hwmgr->default_gfx_power_profile; - hwmgr->default_compute_power_profile.type = AMD_PP_COMPUTE_PROFILE; - - /* Workaround compute SDMA instability: disable lowest SCLK - * DPM level. Optimize compute power profile: Use only highest - * 2 power levels (if more than 2 are available), Hysteresis: - * 0ms up, 5ms down - */ - if (data->smc_state_table.GraphicsDpmLevelCount > 2) - min_level = data->smc_state_table.GraphicsDpmLevelCount - 2; - else if (data->smc_state_table.GraphicsDpmLevelCount == 2) - min_level = 1; - else - min_level = 0; - hwmgr->default_compute_power_profile.min_sclk = - be32_to_cpu(levels[min_level].SclkFrequency); - hwmgr->default_compute_power_profile.up_hyst = 0; - hwmgr->default_compute_power_profile.down_hyst = 5; - - hwmgr->gfx_power_profile = hwmgr->default_gfx_power_profile; - hwmgr->compute_power_profile = hwmgr->default_compute_power_profile; -} - -/** - * Initializes the SMC table and uploads it - * - * @param hwmgr the address of the powerplay hardware manager. - * @param pInput the pointer to input data (PowerState) - * @return always 0 - */ -int tonga_init_smc_table(struct pp_hwmgr *hwmgr) -{ - int result; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct tonga_smumgr *smu_data = - (struct tonga_smumgr *)(hwmgr->smu_backend); - SMU72_Discrete_DpmTable *table = &(smu_data->smc_state_table); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - - uint8_t i; - pp_atomctrl_gpio_pin_assignment gpio_pin_assignment; - - - memset(&(smu_data->smc_state_table), 0x00, sizeof(smu_data->smc_state_table)); - - tonga_initialize_power_tune_defaults(hwmgr); - - if (SMU7_VOLTAGE_CONTROL_NONE != data->voltage_control) - tonga_populate_smc_voltage_tables(hwmgr, table); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_AutomaticDCTransition)) - table->SystemFlags |= PPSMC_SYSTEMFLAG_GPIO_DC; - - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_StepVddc)) - table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC; - - if (data->is_memory_gddr5) - table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5; - - i = PHM_READ_FIELD(hwmgr->device, CC_MC_MAX_CHANNEL, NOOFCHAN); - - if (i == 1 || i == 0) - table->SystemFlags |= 0x40; - - if (data->ulv_supported && table_info->us_ulv_voltage_offset) { - result = tonga_populate_ulv_state(hwmgr, table); - PP_ASSERT_WITH_CODE(!result, - "Failed to initialize ULV state !", - return result;); - - cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, - ixCG_ULV_PARAMETER, 0x40035); - } - - result = tonga_populate_smc_link_level(hwmgr, table); - PP_ASSERT_WITH_CODE(!result, - "Failed to initialize Link Level !", return result); - - result = tonga_populate_all_graphic_levels(hwmgr); - PP_ASSERT_WITH_CODE(!result, - "Failed to initialize Graphics Level !", return result); - - result = tonga_populate_all_memory_levels(hwmgr); - PP_ASSERT_WITH_CODE(!result, - "Failed to initialize Memory Level !", return result); - - result = tonga_populate_smc_acpi_level(hwmgr, table); - PP_ASSERT_WITH_CODE(!result, - "Failed to initialize ACPI Level !", return result); - - result = tonga_populate_smc_vce_level(hwmgr, table); - PP_ASSERT_WITH_CODE(!result, - "Failed to initialize VCE Level !", return result); - - result = tonga_populate_smc_acp_level(hwmgr, table); - PP_ASSERT_WITH_CODE(!result, - "Failed to initialize ACP Level !", return result); - - result = tonga_populate_smc_samu_level(hwmgr, table); - PP_ASSERT_WITH_CODE(!result, - "Failed to initialize SAMU Level !", return result); - - /* Since only the initial state is completely set up at this - * point (the other states are just copies of the boot state) we only - * need to populate the ARB settings for the initial state. - */ - result = tonga_program_memory_timing_parameters(hwmgr); - PP_ASSERT_WITH_CODE(!result, - "Failed to Write ARB settings for the initial state.", - return result;); - - result = tonga_populate_smc_uvd_level(hwmgr, table); - PP_ASSERT_WITH_CODE(!result, - "Failed to initialize UVD Level !", return result); - - result = tonga_populate_smc_boot_level(hwmgr, table); - PP_ASSERT_WITH_CODE(!result, - "Failed to initialize Boot Level !", return result); - - tonga_populate_bapm_parameters_in_dpm_table(hwmgr); - PP_ASSERT_WITH_CODE(!result, - "Failed to populate BAPM Parameters !", return result); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_ClockStretcher)) { - result = tonga_populate_clock_stretcher_data_table(hwmgr); - PP_ASSERT_WITH_CODE(!result, - "Failed to populate Clock Stretcher Data Table !", - return result;); - } - table->GraphicsVoltageChangeEnable = 1; - table->GraphicsThermThrottleEnable = 1; - table->GraphicsInterval = 1; - table->VoltageInterval = 1; - table->ThermalInterval = 1; - table->TemperatureLimitHigh = - table_info->cac_dtp_table->usTargetOperatingTemp * - SMU7_Q88_FORMAT_CONVERSION_UNIT; - table->TemperatureLimitLow = - (table_info->cac_dtp_table->usTargetOperatingTemp - 1) * - SMU7_Q88_FORMAT_CONVERSION_UNIT; - table->MemoryVoltageChangeEnable = 1; - table->MemoryInterval = 1; - table->VoltageResponseTime = 0; - table->PhaseResponseTime = 0; - table->MemoryThermThrottleEnable = 1; - - /* - * Cail reads current link status and reports it as cap (we cannot - * change this due to some previous issues we had) - * SMC drops the link status to lowest level after enabling - * DPM by PowerPlay. After pnp or toggling CF, driver gets reloaded again - * but this time Cail reads current link status which was set to low by - * SMC and reports it as cap to powerplay - * To avoid it, we set PCIeBootLinkLevel to highest dpm level - */ - PP_ASSERT_WITH_CODE((1 <= data->dpm_table.pcie_speed_table.count), - "There must be 1 or more PCIE levels defined in PPTable.", - return -EINVAL); - - table->PCIeBootLinkLevel = (uint8_t) (data->dpm_table.pcie_speed_table.count); - - table->PCIeGenInterval = 1; - - result = tonga_populate_vr_config(hwmgr, table); - PP_ASSERT_WITH_CODE(!result, - "Failed to populate VRConfig setting !", return result); - - table->ThermGpio = 17; - table->SclkStepSize = 0x4000; - - if (atomctrl_get_pp_assign_pin(hwmgr, VDDC_VRHOT_GPIO_PINID, - &gpio_pin_assignment)) { - table->VRHotGpio = gpio_pin_assignment.uc_gpio_pin_bit_shift; - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_RegulatorHot); - } else { - table->VRHotGpio = SMU7_UNUSED_GPIO_PIN; - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_RegulatorHot); - } - - if (atomctrl_get_pp_assign_pin(hwmgr, PP_AC_DC_SWITCH_GPIO_PINID, - &gpio_pin_assignment)) { - table->AcDcGpio = gpio_pin_assignment.uc_gpio_pin_bit_shift; - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_AutomaticDCTransition); - } else { - table->AcDcGpio = SMU7_UNUSED_GPIO_PIN; - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_AutomaticDCTransition); - } - - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_Falcon_QuickTransition); - - if (0) { - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_AutomaticDCTransition); - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_Falcon_QuickTransition); - } - - if (atomctrl_get_pp_assign_pin(hwmgr, - THERMAL_INT_OUTPUT_GPIO_PINID, &gpio_pin_assignment)) { - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_ThermalOutGPIO); - - table->ThermOutGpio = gpio_pin_assignment.uc_gpio_pin_bit_shift; - - table->ThermOutPolarity = - (0 == (cgs_read_register(hwmgr->device, mmGPIOPAD_A) & - (1 << gpio_pin_assignment.uc_gpio_pin_bit_shift))) ? 1 : 0; - - table->ThermOutMode = SMU7_THERM_OUT_MODE_THERM_ONLY; - - /* if required, combine VRHot/PCC with thermal out GPIO*/ - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_RegulatorHot) && - phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_CombinePCCWithThermalSignal)){ - table->ThermOutMode = SMU7_THERM_OUT_MODE_THERM_VRHOT; - } - } else { - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_ThermalOutGPIO); - - table->ThermOutGpio = 17; - table->ThermOutPolarity = 1; - table->ThermOutMode = SMU7_THERM_OUT_MODE_DISABLE; - } - - for (i = 0; i < SMU72_MAX_ENTRIES_SMIO; i++) - table->Smio[i] = PP_HOST_TO_SMC_UL(table->Smio[i]); - - CONVERT_FROM_HOST_TO_SMC_UL(table->SystemFlags); - CONVERT_FROM_HOST_TO_SMC_UL(table->VRConfig); - CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask1); - CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask2); - CONVERT_FROM_HOST_TO_SMC_UL(table->SclkStepSize); - CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitHigh); - CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitLow); - CONVERT_FROM_HOST_TO_SMC_US(table->VoltageResponseTime); - CONVERT_FROM_HOST_TO_SMC_US(table->PhaseResponseTime); - - /* Upload all dpm data to SMC memory.(dpm level, dpm level count etc) */ - result = smu7_copy_bytes_to_smc( - hwmgr, - smu_data->smu7_data.dpm_table_start + offsetof(SMU72_Discrete_DpmTable, SystemFlags), - (uint8_t *)&(table->SystemFlags), - sizeof(SMU72_Discrete_DpmTable) - 3 * sizeof(SMU72_PIDController), - SMC_RAM_END); - - PP_ASSERT_WITH_CODE(!result, - "Failed to upload dpm data to SMC memory !", return result;); - - result = tonga_init_arb_table_index(hwmgr); - PP_ASSERT_WITH_CODE(!result, - "Failed to upload arb data to SMC memory !", return result); - - tonga_populate_pm_fuses(hwmgr); - PP_ASSERT_WITH_CODE((!result), - "Failed to populate initialize pm fuses !", return result); - - result = tonga_populate_initial_mc_reg_table(hwmgr); - PP_ASSERT_WITH_CODE((!result), - "Failed to populate initialize MC Reg table !", return result); - - tonga_save_default_power_profile(hwmgr); - - return 0; -} - -/** -* Set up the fan table to control the fan using the SMC. -* @param hwmgr the address of the powerplay hardware manager. -* @param pInput the pointer to input data -* @param pOutput the pointer to output data -* @param pStorage the pointer to temporary storage -* @param Result the last failure code -* @return result from set temperature range routine -*/ -int tonga_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) -{ - struct tonga_smumgr *smu_data = - (struct tonga_smumgr *)(hwmgr->smu_backend); - SMU72_Discrete_FanTable fan_table = { FDO_MODE_HARDWARE }; - uint32_t duty100; - uint32_t t_diff1, t_diff2, pwm_diff1, pwm_diff2; - uint16_t fdo_min, slope1, slope2; - uint32_t reference_clock; - int res; - uint64_t tmp64; - - if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_MicrocodeFanControl)) - return 0; - - if (hwmgr->thermal_controller.fanInfo.bNoFan) { - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_MicrocodeFanControl); - return 0; - } - - if (0 == smu_data->smu7_data.fan_table_start) { - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_MicrocodeFanControl); - return 0; - } - - duty100 = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, - CG_FDO_CTRL1, FMAX_DUTY100); - - if (0 == duty100) { - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_MicrocodeFanControl); - return 0; - } - - tmp64 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMMin * duty100; - do_div(tmp64, 10000); - fdo_min = (uint16_t)tmp64; - - t_diff1 = hwmgr->thermal_controller.advanceFanControlParameters.usTMed - - hwmgr->thermal_controller.advanceFanControlParameters.usTMin; - t_diff2 = hwmgr->thermal_controller.advanceFanControlParameters.usTHigh - - hwmgr->thermal_controller.advanceFanControlParameters.usTMed; - - pwm_diff1 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMMed - - hwmgr->thermal_controller.advanceFanControlParameters.usPWMMin; - pwm_diff2 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMHigh - - hwmgr->thermal_controller.advanceFanControlParameters.usPWMMed; - - slope1 = (uint16_t)((50 + ((16 * duty100 * pwm_diff1) / t_diff1)) / 100); - slope2 = (uint16_t)((50 + ((16 * duty100 * pwm_diff2) / t_diff2)) / 100); - - fan_table.TempMin = cpu_to_be16((50 + hwmgr->thermal_controller.advanceFanControlParameters.usTMin) / 100); - fan_table.TempMed = cpu_to_be16((50 + hwmgr->thermal_controller.advanceFanControlParameters.usTMed) / 100); - fan_table.TempMax = cpu_to_be16((50 + hwmgr->thermal_controller.advanceFanControlParameters.usTMax) / 100); - - fan_table.Slope1 = cpu_to_be16(slope1); - fan_table.Slope2 = cpu_to_be16(slope2); - - fan_table.FdoMin = cpu_to_be16(fdo_min); - - fan_table.HystDown = cpu_to_be16(hwmgr->thermal_controller.advanceFanControlParameters.ucTHyst); - - fan_table.HystUp = cpu_to_be16(1); - - fan_table.HystSlope = cpu_to_be16(1); - - fan_table.TempRespLim = cpu_to_be16(5); - - reference_clock = smu7_get_xclk(hwmgr); - - fan_table.RefreshPeriod = cpu_to_be32((hwmgr->thermal_controller.advanceFanControlParameters.ulCycleDelay * reference_clock) / 1600); - - fan_table.FdoMax = cpu_to_be16((uint16_t)duty100); - - fan_table.TempSrc = (uint8_t)PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_MULT_THERMAL_CTRL, TEMP_SEL); - - fan_table.FanControl_GL_Flag = 1; - - res = smu7_copy_bytes_to_smc(hwmgr, - smu_data->smu7_data.fan_table_start, - (uint8_t *)&fan_table, - (uint32_t)sizeof(fan_table), - SMC_RAM_END); - - return 0; -} - - -static int tonga_program_mem_timing_parameters(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - if (data->need_update_smu7_dpm_table & - (DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_OD_UPDATE_MCLK)) - return tonga_program_memory_timing_parameters(hwmgr); - - return 0; -} - -int tonga_update_sclk_threshold(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct tonga_smumgr *smu_data = - (struct tonga_smumgr *)(hwmgr->smu_backend); - - int result = 0; - uint32_t low_sclk_interrupt_threshold = 0; - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_SclkThrottleLowNotification) - && (hwmgr->gfx_arbiter.sclk_threshold != - data->low_sclk_interrupt_threshold)) { - data->low_sclk_interrupt_threshold = - hwmgr->gfx_arbiter.sclk_threshold; - low_sclk_interrupt_threshold = - data->low_sclk_interrupt_threshold; - - CONVERT_FROM_HOST_TO_SMC_UL(low_sclk_interrupt_threshold); - - result = smu7_copy_bytes_to_smc( - hwmgr, - smu_data->smu7_data.dpm_table_start + - offsetof(SMU72_Discrete_DpmTable, - LowSclkInterruptThreshold), - (uint8_t *)&low_sclk_interrupt_threshold, - sizeof(uint32_t), - SMC_RAM_END); - } - - result = tonga_update_and_upload_mc_reg_table(hwmgr); - - PP_ASSERT_WITH_CODE((!result), - "Failed to upload MC reg table !", - return result); - - result = tonga_program_mem_timing_parameters(hwmgr); - PP_ASSERT_WITH_CODE((result == 0), - "Failed to program memory timing parameters !", - ); - - return result; -} - -uint32_t tonga_get_offsetof(uint32_t type, uint32_t member) -{ - switch (type) { - case SMU_SoftRegisters: - switch (member) { - case HandshakeDisables: - return offsetof(SMU72_SoftRegisters, HandshakeDisables); - case VoltageChangeTimeout: - return offsetof(SMU72_SoftRegisters, VoltageChangeTimeout); - case AverageGraphicsActivity: - return offsetof(SMU72_SoftRegisters, AverageGraphicsActivity); - case PreVBlankGap: - return offsetof(SMU72_SoftRegisters, PreVBlankGap); - case VBlankTimeout: - return offsetof(SMU72_SoftRegisters, VBlankTimeout); - case UcodeLoadStatus: - return offsetof(SMU72_SoftRegisters, UcodeLoadStatus); - } - case SMU_Discrete_DpmTable: - switch (member) { - case UvdBootLevel: - return offsetof(SMU72_Discrete_DpmTable, UvdBootLevel); - case VceBootLevel: - return offsetof(SMU72_Discrete_DpmTable, VceBootLevel); - case SamuBootLevel: - return offsetof(SMU72_Discrete_DpmTable, SamuBootLevel); - case LowSclkInterruptThreshold: - return offsetof(SMU72_Discrete_DpmTable, LowSclkInterruptThreshold); - } - } - pr_warn("can't get the offset of type %x member %x\n", type, member); - return 0; -} - -uint32_t tonga_get_mac_definition(uint32_t value) -{ - switch (value) { - case SMU_MAX_LEVELS_GRAPHICS: - return SMU72_MAX_LEVELS_GRAPHICS; - case SMU_MAX_LEVELS_MEMORY: - return SMU72_MAX_LEVELS_MEMORY; - case SMU_MAX_LEVELS_LINK: - return SMU72_MAX_LEVELS_LINK; - case SMU_MAX_ENTRIES_SMIO: - return SMU72_MAX_ENTRIES_SMIO; - case SMU_MAX_LEVELS_VDDC: - return SMU72_MAX_LEVELS_VDDC; - case SMU_MAX_LEVELS_VDDGFX: - return SMU72_MAX_LEVELS_VDDGFX; - case SMU_MAX_LEVELS_VDDCI: - return SMU72_MAX_LEVELS_VDDCI; - case SMU_MAX_LEVELS_MVDD: - return SMU72_MAX_LEVELS_MVDD; - } - pr_warn("can't get the mac value %x\n", value); - - return 0; -} - - -static int tonga_update_uvd_smc_table(struct pp_hwmgr *hwmgr) -{ - struct tonga_smumgr *smu_data = - (struct tonga_smumgr *)(hwmgr->smu_backend); - uint32_t mm_boot_level_offset, mm_boot_level_value; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - - smu_data->smc_state_table.UvdBootLevel = 0; - if (table_info->mm_dep_table->count > 0) - smu_data->smc_state_table.UvdBootLevel = - (uint8_t) (table_info->mm_dep_table->count - 1); - mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + - offsetof(SMU72_Discrete_DpmTable, UvdBootLevel); - mm_boot_level_offset /= 4; - mm_boot_level_offset *= 4; - mm_boot_level_value = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset); - mm_boot_level_value &= 0x00FFFFFF; - mm_boot_level_value |= smu_data->smc_state_table.UvdBootLevel << 24; - cgs_write_ind_register(hwmgr->device, - CGS_IND_REG__SMC, - mm_boot_level_offset, mm_boot_level_value); - - if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_UVDDPM) || - phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_StablePState)) - smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_UVDDPM_SetEnabledMask, - (uint32_t)(1 << smu_data->smc_state_table.UvdBootLevel)); - return 0; -} - -static int tonga_update_vce_smc_table(struct pp_hwmgr *hwmgr) -{ - struct tonga_smumgr *smu_data = - (struct tonga_smumgr *)(hwmgr->smu_backend); - uint32_t mm_boot_level_offset, mm_boot_level_value; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - - - smu_data->smc_state_table.VceBootLevel = - (uint8_t) (table_info->mm_dep_table->count - 1); - - mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + - offsetof(SMU72_Discrete_DpmTable, VceBootLevel); - mm_boot_level_offset /= 4; - mm_boot_level_offset *= 4; - mm_boot_level_value = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset); - mm_boot_level_value &= 0xFF00FFFF; - mm_boot_level_value |= smu_data->smc_state_table.VceBootLevel << 16; - cgs_write_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_StablePState)) - smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_VCEDPM_SetEnabledMask, - (uint32_t)1 << smu_data->smc_state_table.VceBootLevel); - return 0; -} - -static int tonga_update_samu_smc_table(struct pp_hwmgr *hwmgr) -{ - struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); - uint32_t mm_boot_level_offset, mm_boot_level_value; - - smu_data->smc_state_table.SamuBootLevel = 0; - mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + - offsetof(SMU72_Discrete_DpmTable, SamuBootLevel); - - mm_boot_level_offset /= 4; - mm_boot_level_offset *= 4; - mm_boot_level_value = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset); - mm_boot_level_value &= 0xFFFFFF00; - mm_boot_level_value |= smu_data->smc_state_table.SamuBootLevel << 0; - cgs_write_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_StablePState)) - smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SAMUDPM_SetEnabledMask, - (uint32_t)(1 << smu_data->smc_state_table.SamuBootLevel)); - return 0; -} - -int tonga_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type) -{ - switch (type) { - case SMU_UVD_TABLE: - tonga_update_uvd_smc_table(hwmgr); - break; - case SMU_VCE_TABLE: - tonga_update_vce_smc_table(hwmgr); - break; - case SMU_SAMU_TABLE: - tonga_update_samu_smc_table(hwmgr); - break; - default: - break; - } - return 0; -} - - -/** - * Get the location of various tables inside the FW image. - * - * @param hwmgr the address of the powerplay hardware manager. - * @return always 0 - */ -int tonga_process_firmware_header(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); - - uint32_t tmp; - int result; - bool error = false; - - result = smu7_read_smc_sram_dword(hwmgr, - SMU72_FIRMWARE_HEADER_LOCATION + - offsetof(SMU72_Firmware_Header, DpmTable), - &tmp, SMC_RAM_END); - - if (!result) - smu_data->smu7_data.dpm_table_start = tmp; - - error |= (result != 0); - - result = smu7_read_smc_sram_dword(hwmgr, - SMU72_FIRMWARE_HEADER_LOCATION + - offsetof(SMU72_Firmware_Header, SoftRegisters), - &tmp, SMC_RAM_END); - - if (!result) { - data->soft_regs_start = tmp; - smu_data->smu7_data.soft_regs_start = tmp; - } - - error |= (result != 0); - - - result = smu7_read_smc_sram_dword(hwmgr, - SMU72_FIRMWARE_HEADER_LOCATION + - offsetof(SMU72_Firmware_Header, mcRegisterTable), - &tmp, SMC_RAM_END); - - if (!result) - smu_data->smu7_data.mc_reg_table_start = tmp; - - result = smu7_read_smc_sram_dword(hwmgr, - SMU72_FIRMWARE_HEADER_LOCATION + - offsetof(SMU72_Firmware_Header, FanTable), - &tmp, SMC_RAM_END); - - if (!result) - smu_data->smu7_data.fan_table_start = tmp; - - error |= (result != 0); - - result = smu7_read_smc_sram_dword(hwmgr, - SMU72_FIRMWARE_HEADER_LOCATION + - offsetof(SMU72_Firmware_Header, mcArbDramTimingTable), - &tmp, SMC_RAM_END); - - if (!result) - smu_data->smu7_data.arb_table_start = tmp; - - error |= (result != 0); - - result = smu7_read_smc_sram_dword(hwmgr, - SMU72_FIRMWARE_HEADER_LOCATION + - offsetof(SMU72_Firmware_Header, Version), - &tmp, SMC_RAM_END); - - if (!result) - hwmgr->microcode_version_info.SMC = tmp; - - error |= (result != 0); - - return error ? 1 : 0; -} - -/*---------------------------MC----------------------------*/ - -static uint8_t tonga_get_memory_modile_index(struct pp_hwmgr *hwmgr) -{ - return (uint8_t) (0xFF & (cgs_read_register(hwmgr->device, mmBIOS_SCRATCH_4) >> 16)); -} - -static bool tonga_check_s0_mc_reg_index(uint16_t in_reg, uint16_t *out_reg) -{ - bool result = true; - - switch (in_reg) { - case mmMC_SEQ_RAS_TIMING: - *out_reg = mmMC_SEQ_RAS_TIMING_LP; - break; - - case mmMC_SEQ_DLL_STBY: - *out_reg = mmMC_SEQ_DLL_STBY_LP; - break; - - case mmMC_SEQ_G5PDX_CMD0: - *out_reg = mmMC_SEQ_G5PDX_CMD0_LP; - break; - - case mmMC_SEQ_G5PDX_CMD1: - *out_reg = mmMC_SEQ_G5PDX_CMD1_LP; - break; - - case mmMC_SEQ_G5PDX_CTRL: - *out_reg = mmMC_SEQ_G5PDX_CTRL_LP; - break; - - case mmMC_SEQ_CAS_TIMING: - *out_reg = mmMC_SEQ_CAS_TIMING_LP; - break; - - case mmMC_SEQ_MISC_TIMING: - *out_reg = mmMC_SEQ_MISC_TIMING_LP; - break; - - case mmMC_SEQ_MISC_TIMING2: - *out_reg = mmMC_SEQ_MISC_TIMING2_LP; - break; - - case mmMC_SEQ_PMG_DVS_CMD: - *out_reg = mmMC_SEQ_PMG_DVS_CMD_LP; - break; - - case mmMC_SEQ_PMG_DVS_CTL: - *out_reg = mmMC_SEQ_PMG_DVS_CTL_LP; - break; - - case mmMC_SEQ_RD_CTL_D0: - *out_reg = mmMC_SEQ_RD_CTL_D0_LP; - break; - - case mmMC_SEQ_RD_CTL_D1: - *out_reg = mmMC_SEQ_RD_CTL_D1_LP; - break; - - case mmMC_SEQ_WR_CTL_D0: - *out_reg = mmMC_SEQ_WR_CTL_D0_LP; - break; - - case mmMC_SEQ_WR_CTL_D1: - *out_reg = mmMC_SEQ_WR_CTL_D1_LP; - break; - - case mmMC_PMG_CMD_EMRS: - *out_reg = mmMC_SEQ_PMG_CMD_EMRS_LP; - break; - - case mmMC_PMG_CMD_MRS: - *out_reg = mmMC_SEQ_PMG_CMD_MRS_LP; - break; - - case mmMC_PMG_CMD_MRS1: - *out_reg = mmMC_SEQ_PMG_CMD_MRS1_LP; - break; - - case mmMC_SEQ_PMG_TIMING: - *out_reg = mmMC_SEQ_PMG_TIMING_LP; - break; - - case mmMC_PMG_CMD_MRS2: - *out_reg = mmMC_SEQ_PMG_CMD_MRS2_LP; - break; - - case mmMC_SEQ_WR_CTL_2: - *out_reg = mmMC_SEQ_WR_CTL_2_LP; - break; - - default: - result = false; - break; - } - - return result; -} - -static int tonga_set_s0_mc_reg_index(struct tonga_mc_reg_table *table) -{ - uint32_t i; - uint16_t address; - - for (i = 0; i < table->last; i++) { - table->mc_reg_address[i].s0 = - tonga_check_s0_mc_reg_index(table->mc_reg_address[i].s1, - &address) ? - address : - table->mc_reg_address[i].s1; - } - return 0; -} - -static int tonga_copy_vbios_smc_reg_table(const pp_atomctrl_mc_reg_table *table, - struct tonga_mc_reg_table *ni_table) -{ - uint8_t i, j; - - PP_ASSERT_WITH_CODE((table->last <= SMU72_DISCRETE_MC_REGISTER_ARRAY_SIZE), - "Invalid VramInfo table.", return -EINVAL); - PP_ASSERT_WITH_CODE((table->num_entries <= MAX_AC_TIMING_ENTRIES), - "Invalid VramInfo table.", return -EINVAL); - - for (i = 0; i < table->last; i++) - ni_table->mc_reg_address[i].s1 = table->mc_reg_address[i].s1; - - ni_table->last = table->last; - - for (i = 0; i < table->num_entries; i++) { - ni_table->mc_reg_table_entry[i].mclk_max = - table->mc_reg_table_entry[i].mclk_max; - for (j = 0; j < table->last; j++) { - ni_table->mc_reg_table_entry[i].mc_data[j] = - table->mc_reg_table_entry[i].mc_data[j]; - } - } - - ni_table->num_entries = table->num_entries; - - return 0; -} - -/** - * VBIOS omits some information to reduce size, we need to recover them here. - * 1. when we see mmMC_SEQ_MISC1, bit[31:16] EMRS1, need to be write to - * mmMC_PMG_CMD_EMRS /_LP[15:0]. Bit[15:0] MRS, need to be update - * mmMC_PMG_CMD_MRS/_LP[15:0] - * 2. when we see mmMC_SEQ_RESERVE_M, bit[15:0] EMRS2, need to be write to - * mmMC_PMG_CMD_MRS1/_LP[15:0]. - * 3. need to set these data for each clock range - * @param hwmgr the address of the powerplay hardware manager. - * @param table the address of MCRegTable - * @return always 0 - */ -static int tonga_set_mc_special_registers(struct pp_hwmgr *hwmgr, - struct tonga_mc_reg_table *table) -{ - uint8_t i, j, k; - uint32_t temp_reg; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - for (i = 0, j = table->last; i < table->last; i++) { - PP_ASSERT_WITH_CODE((j < SMU72_DISCRETE_MC_REGISTER_ARRAY_SIZE), - "Invalid VramInfo table.", return -EINVAL); - - switch (table->mc_reg_address[i].s1) { - - case mmMC_SEQ_MISC1: - temp_reg = cgs_read_register(hwmgr->device, - mmMC_PMG_CMD_EMRS); - table->mc_reg_address[j].s1 = mmMC_PMG_CMD_EMRS; - table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_EMRS_LP; - for (k = 0; k < table->num_entries; k++) { - table->mc_reg_table_entry[k].mc_data[j] = - ((temp_reg & 0xffff0000)) | - ((table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16); - } - j++; - PP_ASSERT_WITH_CODE((j < SMU72_DISCRETE_MC_REGISTER_ARRAY_SIZE), - "Invalid VramInfo table.", return -EINVAL); - - temp_reg = cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS); - table->mc_reg_address[j].s1 = mmMC_PMG_CMD_MRS; - table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_MRS_LP; - for (k = 0; k < table->num_entries; k++) { - table->mc_reg_table_entry[k].mc_data[j] = - (temp_reg & 0xffff0000) | - (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); - - if (!data->is_memory_gddr5) - table->mc_reg_table_entry[k].mc_data[j] |= 0x100; - } - j++; - PP_ASSERT_WITH_CODE((j <= SMU72_DISCRETE_MC_REGISTER_ARRAY_SIZE), - "Invalid VramInfo table.", return -EINVAL); - - if (!data->is_memory_gddr5) { - table->mc_reg_address[j].s1 = mmMC_PMG_AUTO_CMD; - table->mc_reg_address[j].s0 = mmMC_PMG_AUTO_CMD; - for (k = 0; k < table->num_entries; k++) - table->mc_reg_table_entry[k].mc_data[j] = - (table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16; - j++; - PP_ASSERT_WITH_CODE((j <= SMU72_DISCRETE_MC_REGISTER_ARRAY_SIZE), - "Invalid VramInfo table.", return -EINVAL); - } - - break; - - case mmMC_SEQ_RESERVE_M: - temp_reg = cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS1); - table->mc_reg_address[j].s1 = mmMC_PMG_CMD_MRS1; - table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_MRS1_LP; - for (k = 0; k < table->num_entries; k++) { - table->mc_reg_table_entry[k].mc_data[j] = - (temp_reg & 0xffff0000) | - (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); - } - j++; - PP_ASSERT_WITH_CODE((j <= SMU72_DISCRETE_MC_REGISTER_ARRAY_SIZE), - "Invalid VramInfo table.", return -EINVAL); - break; - - default: - break; - } - - } - - table->last = j; - - return 0; -} - -static int tonga_set_valid_flag(struct tonga_mc_reg_table *table) -{ - uint8_t i, j; - - for (i = 0; i < table->last; i++) { - for (j = 1; j < table->num_entries; j++) { - if (table->mc_reg_table_entry[j-1].mc_data[i] != - table->mc_reg_table_entry[j].mc_data[i]) { - table->validflag |= (1<<i); - break; - } - } - } - - return 0; -} - -int tonga_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) -{ - int result; - struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); - pp_atomctrl_mc_reg_table *table; - struct tonga_mc_reg_table *ni_table = &smu_data->mc_reg_table; - uint8_t module_index = tonga_get_memory_modile_index(hwmgr); - - table = kzalloc(sizeof(pp_atomctrl_mc_reg_table), GFP_KERNEL); - - if (table == NULL) - return -ENOMEM; - - /* Program additional LP registers that are no longer programmed by VBIOS */ - cgs_write_register(hwmgr->device, mmMC_SEQ_RAS_TIMING_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_RAS_TIMING)); - cgs_write_register(hwmgr->device, mmMC_SEQ_CAS_TIMING_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_CAS_TIMING)); - cgs_write_register(hwmgr->device, mmMC_SEQ_DLL_STBY_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_DLL_STBY)); - cgs_write_register(hwmgr->device, mmMC_SEQ_G5PDX_CMD0_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_G5PDX_CMD0)); - cgs_write_register(hwmgr->device, mmMC_SEQ_G5PDX_CMD1_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_G5PDX_CMD1)); - cgs_write_register(hwmgr->device, mmMC_SEQ_G5PDX_CTRL_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_G5PDX_CTRL)); - cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_DVS_CMD_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_PMG_DVS_CMD)); - cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_DVS_CTL_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_PMG_DVS_CTL)); - cgs_write_register(hwmgr->device, mmMC_SEQ_MISC_TIMING_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_MISC_TIMING)); - cgs_write_register(hwmgr->device, mmMC_SEQ_MISC_TIMING2_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_MISC_TIMING2)); - cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_EMRS_LP, - cgs_read_register(hwmgr->device, mmMC_PMG_CMD_EMRS)); - cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_MRS_LP, - cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS)); - cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_MRS1_LP, - cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS1)); - cgs_write_register(hwmgr->device, mmMC_SEQ_WR_CTL_D0_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_WR_CTL_D0)); - cgs_write_register(hwmgr->device, mmMC_SEQ_WR_CTL_D1_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_WR_CTL_D1)); - cgs_write_register(hwmgr->device, mmMC_SEQ_RD_CTL_D0_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_RD_CTL_D0)); - cgs_write_register(hwmgr->device, mmMC_SEQ_RD_CTL_D1_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_RD_CTL_D1)); - cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_TIMING_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_PMG_TIMING)); - cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_MRS2_LP, - cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS2)); - cgs_write_register(hwmgr->device, mmMC_SEQ_WR_CTL_2_LP, - cgs_read_register(hwmgr->device, mmMC_SEQ_WR_CTL_2)); - - memset(table, 0x00, sizeof(pp_atomctrl_mc_reg_table)); - - result = atomctrl_initialize_mc_reg_table(hwmgr, module_index, table); - - if (!result) - result = tonga_copy_vbios_smc_reg_table(table, ni_table); - - if (!result) { - tonga_set_s0_mc_reg_index(ni_table); - result = tonga_set_mc_special_registers(hwmgr, ni_table); - } - - if (!result) - tonga_set_valid_flag(ni_table); - - kfree(table); - - return result; -} - -bool tonga_is_dpm_running(struct pp_hwmgr *hwmgr) -{ - return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) - ? true : false; -} - -int tonga_populate_requested_graphic_levels(struct pp_hwmgr *hwmgr, - struct amd_pp_profile *request) -{ - struct tonga_smumgr *smu_data = (struct tonga_smumgr *) - (hwmgr->smu_backend); - struct SMU72_Discrete_GraphicsLevel *levels = - smu_data->smc_state_table.GraphicsLevel; - uint32_t array = smu_data->smu7_data.dpm_table_start + - offsetof(SMU72_Discrete_DpmTable, GraphicsLevel); - uint32_t array_size = sizeof(struct SMU72_Discrete_GraphicsLevel) * - SMU72_MAX_LEVELS_GRAPHICS; - uint32_t i; - - for (i = 0; i < smu_data->smc_state_table.GraphicsDpmLevelCount; i++) { - levels[i].ActivityLevel = - cpu_to_be16(request->activity_threshold); - levels[i].EnabledForActivity = 1; - levels[i].UpHyst = request->up_hyst; - levels[i].DownHyst = request->down_hyst; - } - - return smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels, - array_size, SMC_RAM_END); -} diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smc.h b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smc.h deleted file mode 100644 index 9d6a78a65976..000000000000 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smc.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright 2015 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ -#ifndef _TONGA_SMC_H -#define _TONGA_SMC_H - -#include "smumgr.h" -#include "smu72.h" - - -#define ASICID_IS_TONGA_P(wDID, bRID) \ - (((wDID == 0x6930) && ((bRID == 0xF0) || (bRID == 0xF1) || (bRID == 0xFF))) \ - || ((wDID == 0x6920) && ((bRID == 0) || (bRID == 1)))) - - -struct tonga_pt_defaults { - uint8_t svi_load_line_en; - uint8_t svi_load_line_vddC; - uint8_t tdc_vddc_throttle_release_limit_perc; - uint8_t tdc_mawt; - uint8_t tdc_waterfall_ctl; - uint8_t dte_ambient_temp_base; - uint32_t display_cac; - uint32_t bapm_temp_gradient; - uint16_t bapmti_r[SMU72_DTE_ITERATIONS * SMU72_DTE_SOURCES * SMU72_DTE_SINKS]; - uint16_t bapmti_rc[SMU72_DTE_ITERATIONS * SMU72_DTE_SOURCES * SMU72_DTE_SINKS]; -}; - -int tonga_populate_all_graphic_levels(struct pp_hwmgr *hwmgr); -int tonga_populate_all_memory_levels(struct pp_hwmgr *hwmgr); -int tonga_init_smc_table(struct pp_hwmgr *hwmgr); -int tonga_thermal_setup_fan_table(struct pp_hwmgr *hwmgr); -int tonga_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type); -int tonga_update_sclk_threshold(struct pp_hwmgr *hwmgr); -uint32_t tonga_get_offsetof(uint32_t type, uint32_t member); -uint32_t tonga_get_mac_definition(uint32_t value); -int tonga_process_firmware_header(struct pp_hwmgr *hwmgr); -int tonga_initialize_mc_reg_table(struct pp_hwmgr *hwmgr); -bool tonga_is_dpm_running(struct pp_hwmgr *hwmgr); -int tonga_populate_requested_graphic_levels(struct pp_hwmgr *hwmgr, - struct amd_pp_profile *request); -#endif - diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c index d22cf218cf18..0a8e48bff219 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c @@ -33,9 +33,69 @@ #include "smu/smu_7_1_2_d.h" #include "smu/smu_7_1_2_sh_mask.h" #include "cgs_common.h" -#include "tonga_smc.h" #include "smu7_smumgr.h" +#include "smu7_dyn_defaults.h" + +#include "smu7_hwmgr.h" +#include "hardwaremanager.h" +#include "ppatomctrl.h" + +#include "atombios.h" + +#include "pppcielanes.h" +#include "pp_endian.h" + +#include "gmc/gmc_8_1_d.h" +#include "gmc/gmc_8_1_sh_mask.h" + +#include "bif/bif_5_0_d.h" +#include "bif/bif_5_0_sh_mask.h" + +#include "dce/dce_10_0_d.h" +#include "dce/dce_10_0_sh_mask.h" + + +#define VOLTAGE_SCALE 4 +#define POWERTUNE_DEFAULT_SET_MAX 1 +#define VOLTAGE_VID_OFFSET_SCALE1 625 +#define VOLTAGE_VID_OFFSET_SCALE2 100 +#define MC_CG_ARB_FREQ_F1 0x0b +#define VDDC_VDDCI_DELTA 200 + + +static const struct tonga_pt_defaults tonga_power_tune_data_set_array[POWERTUNE_DEFAULT_SET_MAX] = { +/* sviLoadLIneEn, SviLoadLineVddC, TDC_VDDC_ThrottleReleaseLimitPerc, TDC_MAWt, + * TdcWaterfallCtl, DTEAmbientTempBase, DisplayCac, BAPM_TEMP_GRADIENT + */ + {1, 0xF, 0xFD, 0x19, + 5, 45, 0, 0xB0000, + {0x79, 0x253, 0x25D, 0xAE, 0x72, 0x80, 0x83, 0x86, 0x6F, 0xC8, + 0xC9, 0xC9, 0x2F, 0x4D, 0x61}, + {0x17C, 0x172, 0x180, 0x1BC, 0x1B3, 0x1BD, 0x206, 0x200, 0x203, + 0x25D, 0x25A, 0x255, 0x2C3, 0x2C5, 0x2B4} + }, +}; + +/* [Fmin, Fmax, LDO_REFSEL, USE_FOR_LOW_FREQ] */ +static const uint16_t tonga_clock_stretcher_lookup_table[2][4] = { + {600, 1050, 3, 0}, + {600, 1050, 6, 1} +}; + +/* [FF, SS] type, [] 4 voltage ranges, + * and [Floor Freq, Boundary Freq, VID min , VID max] + */ +static const uint32_t tonga_clock_stretcher_ddt_table[2][4][4] = { + { {265, 529, 120, 128}, {325, 650, 96, 119}, {430, 860, 32, 95}, {0, 0, 0, 31} }, + { {275, 550, 104, 112}, {319, 638, 96, 103}, {360, 720, 64, 95}, {384, 768, 32, 63} } +}; + +/* [Use_For_Low_freq] value, [0%, 5%, 10%, 7.14%, 14.28%, 20%] */ +static const uint8_t tonga_clock_stretch_amount_conversion[2][6] = { + {0, 1, 3, 2, 4, 5}, + {0, 2, 4, 5, 6, 5} +}; static int tonga_start_in_protection_mode(struct pp_hwmgr *hwmgr) { @@ -95,7 +155,6 @@ static int tonga_start_in_protection_mode(struct pp_hwmgr *hwmgr) return 0; } - static int tonga_start_in_non_protection_mode(struct pp_hwmgr *hwmgr) { int result = 0; @@ -160,13 +219,6 @@ static int tonga_start_smu(struct pp_hwmgr *hwmgr) return result; } -/** - * Write a 32bit value to the SMC SRAM space. - * ALL PARAMETERS ARE IN HOST BYTE ORDER. - * @param smumgr the address of the powerplay hardware manager. - * @param smcAddress the address in the SMC RAM to access. - * @param value to write to the SMC SRAM. - */ static int tonga_smu_init(struct pp_hwmgr *hwmgr) { struct tonga_smumgr *tonga_priv = NULL; @@ -187,6 +239,3053 @@ static int tonga_smu_init(struct pp_hwmgr *hwmgr) return 0; } + +static int tonga_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr, + phm_ppt_v1_clock_voltage_dependency_table *allowed_clock_voltage_table, + uint32_t clock, SMU_VoltageLevel *voltage, uint32_t *mvdd) +{ + uint32_t i = 0; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *pptable_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + /* clock - voltage dependency table is empty table */ + if (allowed_clock_voltage_table->count == 0) + return -EINVAL; + + for (i = 0; i < allowed_clock_voltage_table->count; i++) { + /* find first sclk bigger than request */ + if (allowed_clock_voltage_table->entries[i].clk >= clock) { + voltage->VddGfx = phm_get_voltage_index( + pptable_info->vddgfx_lookup_table, + allowed_clock_voltage_table->entries[i].vddgfx); + voltage->Vddc = phm_get_voltage_index( + pptable_info->vddc_lookup_table, + allowed_clock_voltage_table->entries[i].vddc); + + if (allowed_clock_voltage_table->entries[i].vddci) + voltage->Vddci = + phm_get_voltage_id(&data->vddci_voltage_table, allowed_clock_voltage_table->entries[i].vddci); + else + voltage->Vddci = + phm_get_voltage_id(&data->vddci_voltage_table, + allowed_clock_voltage_table->entries[i].vddc - VDDC_VDDCI_DELTA); + + + if (allowed_clock_voltage_table->entries[i].mvdd) + *mvdd = (uint32_t) allowed_clock_voltage_table->entries[i].mvdd; + + voltage->Phases = 1; + return 0; + } + } + + /* sclk is bigger than max sclk in the dependence table */ + voltage->VddGfx = phm_get_voltage_index(pptable_info->vddgfx_lookup_table, + allowed_clock_voltage_table->entries[i-1].vddgfx); + voltage->Vddc = phm_get_voltage_index(pptable_info->vddc_lookup_table, + allowed_clock_voltage_table->entries[i-1].vddc); + + if (allowed_clock_voltage_table->entries[i-1].vddci) + voltage->Vddci = phm_get_voltage_id(&data->vddci_voltage_table, + allowed_clock_voltage_table->entries[i-1].vddci); + + if (allowed_clock_voltage_table->entries[i-1].mvdd) + *mvdd = (uint32_t) allowed_clock_voltage_table->entries[i-1].mvdd; + + return 0; +} + +static int tonga_populate_smc_vddc_table(struct pp_hwmgr *hwmgr, + SMU72_Discrete_DpmTable *table) +{ + unsigned int count; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) { + table->VddcLevelCount = data->vddc_voltage_table.count; + for (count = 0; count < table->VddcLevelCount; count++) { + table->VddcTable[count] = + PP_HOST_TO_SMC_US(data->vddc_voltage_table.entries[count].value * VOLTAGE_SCALE); + } + CONVERT_FROM_HOST_TO_SMC_UL(table->VddcLevelCount); + } + return 0; +} + +static int tonga_populate_smc_vdd_gfx_table(struct pp_hwmgr *hwmgr, + SMU72_Discrete_DpmTable *table) +{ + unsigned int count; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vdd_gfx_control) { + table->VddGfxLevelCount = data->vddgfx_voltage_table.count; + for (count = 0; count < data->vddgfx_voltage_table.count; count++) { + table->VddGfxTable[count] = + PP_HOST_TO_SMC_US(data->vddgfx_voltage_table.entries[count].value * VOLTAGE_SCALE); + } + CONVERT_FROM_HOST_TO_SMC_UL(table->VddGfxLevelCount); + } + return 0; +} + +static int tonga_populate_smc_vdd_ci_table(struct pp_hwmgr *hwmgr, + SMU72_Discrete_DpmTable *table) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t count; + + table->VddciLevelCount = data->vddci_voltage_table.count; + for (count = 0; count < table->VddciLevelCount; count++) { + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) { + table->VddciTable[count] = + PP_HOST_TO_SMC_US(data->vddci_voltage_table.entries[count].value * VOLTAGE_SCALE); + } else if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) { + table->SmioTable1.Pattern[count].Voltage = + PP_HOST_TO_SMC_US(data->vddci_voltage_table.entries[count].value * VOLTAGE_SCALE); + /* Index into DpmTable.Smio. Drive bits from Smio entry to get this voltage level. */ + table->SmioTable1.Pattern[count].Smio = + (uint8_t) count; + table->Smio[count] |= + data->vddci_voltage_table.entries[count].smio_low; + table->VddciTable[count] = + PP_HOST_TO_SMC_US(data->vddci_voltage_table.entries[count].value * VOLTAGE_SCALE); + } + } + + table->SmioMask1 = data->vddci_voltage_table.mask_low; + CONVERT_FROM_HOST_TO_SMC_UL(table->VddciLevelCount); + + return 0; +} + +static int tonga_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr, + SMU72_Discrete_DpmTable *table) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t count; + + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) { + table->MvddLevelCount = data->mvdd_voltage_table.count; + for (count = 0; count < table->MvddLevelCount; count++) { + table->SmioTable2.Pattern[count].Voltage = + PP_HOST_TO_SMC_US(data->mvdd_voltage_table.entries[count].value * VOLTAGE_SCALE); + /* Index into DpmTable.Smio. Drive bits from Smio entry to get this voltage level.*/ + table->SmioTable2.Pattern[count].Smio = + (uint8_t) count; + table->Smio[count] |= + data->mvdd_voltage_table.entries[count].smio_low; + } + table->SmioMask2 = data->mvdd_voltage_table.mask_low; + + CONVERT_FROM_HOST_TO_SMC_UL(table->MvddLevelCount); + } + + return 0; +} + +static int tonga_populate_cac_tables(struct pp_hwmgr *hwmgr, + SMU72_Discrete_DpmTable *table) +{ + uint32_t count; + uint8_t index = 0; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *pptable_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_voltage_lookup_table *vddgfx_lookup_table = + pptable_info->vddgfx_lookup_table; + struct phm_ppt_v1_voltage_lookup_table *vddc_lookup_table = + pptable_info->vddc_lookup_table; + + /* table is already swapped, so in order to use the value from it + * we need to swap it back. + */ + uint32_t vddc_level_count = PP_SMC_TO_HOST_UL(table->VddcLevelCount); + uint32_t vddgfx_level_count = PP_SMC_TO_HOST_UL(table->VddGfxLevelCount); + + for (count = 0; count < vddc_level_count; count++) { + /* We are populating vddc CAC data to BapmVddc table in split and merged mode */ + index = phm_get_voltage_index(vddc_lookup_table, + data->vddc_voltage_table.entries[count].value); + table->BapmVddcVidLoSidd[count] = + convert_to_vid(vddc_lookup_table->entries[index].us_cac_low); + table->BapmVddcVidHiSidd[count] = + convert_to_vid(vddc_lookup_table->entries[index].us_cac_mid); + table->BapmVddcVidHiSidd2[count] = + convert_to_vid(vddc_lookup_table->entries[index].us_cac_high); + } + + if ((data->vdd_gfx_control == SMU7_VOLTAGE_CONTROL_BY_SVID2)) { + /* We are populating vddgfx CAC data to BapmVddgfx table in split mode */ + for (count = 0; count < vddgfx_level_count; count++) { + index = phm_get_voltage_index(vddgfx_lookup_table, + convert_to_vid(vddgfx_lookup_table->entries[index].us_cac_mid)); + table->BapmVddGfxVidHiSidd2[count] = + convert_to_vid(vddgfx_lookup_table->entries[index].us_cac_high); + } + } else { + for (count = 0; count < vddc_level_count; count++) { + index = phm_get_voltage_index(vddc_lookup_table, + data->vddc_voltage_table.entries[count].value); + table->BapmVddGfxVidLoSidd[count] = + convert_to_vid(vddc_lookup_table->entries[index].us_cac_low); + table->BapmVddGfxVidHiSidd[count] = + convert_to_vid(vddc_lookup_table->entries[index].us_cac_mid); + table->BapmVddGfxVidHiSidd2[count] = + convert_to_vid(vddc_lookup_table->entries[index].us_cac_high); + } + } + + return 0; +} + +static int tonga_populate_smc_voltage_tables(struct pp_hwmgr *hwmgr, + SMU72_Discrete_DpmTable *table) +{ + int result; + + result = tonga_populate_smc_vddc_table(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "can not populate VDDC voltage table to SMC", + return -EINVAL); + + result = tonga_populate_smc_vdd_ci_table(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "can not populate VDDCI voltage table to SMC", + return -EINVAL); + + result = tonga_populate_smc_vdd_gfx_table(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "can not populate VDDGFX voltage table to SMC", + return -EINVAL); + + result = tonga_populate_smc_mvdd_table(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "can not populate MVDD voltage table to SMC", + return -EINVAL); + + result = tonga_populate_cac_tables(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "can not populate CAC voltage tables to SMC", + return -EINVAL); + + return 0; +} + +static int tonga_populate_ulv_level(struct pp_hwmgr *hwmgr, + struct SMU72_Discrete_Ulv *state) +{ + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + state->CcPwrDynRm = 0; + state->CcPwrDynRm1 = 0; + + state->VddcOffset = (uint16_t) table_info->us_ulv_voltage_offset; + state->VddcOffsetVid = (uint8_t)(table_info->us_ulv_voltage_offset * + VOLTAGE_VID_OFFSET_SCALE2 / VOLTAGE_VID_OFFSET_SCALE1); + + state->VddcPhase = 1; + + CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm1); + CONVERT_FROM_HOST_TO_SMC_US(state->VddcOffset); + + return 0; +} + +static int tonga_populate_ulv_state(struct pp_hwmgr *hwmgr, + struct SMU72_Discrete_DpmTable *table) +{ + return tonga_populate_ulv_level(hwmgr, &table->Ulv); +} + +static int tonga_populate_smc_link_level(struct pp_hwmgr *hwmgr, SMU72_Discrete_DpmTable *table) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct smu7_dpm_table *dpm_table = &data->dpm_table; + struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); + uint32_t i; + + /* Index (dpm_table->pcie_speed_table.count) is reserved for PCIE boot level. */ + for (i = 0; i <= dpm_table->pcie_speed_table.count; i++) { + table->LinkLevel[i].PcieGenSpeed = + (uint8_t)dpm_table->pcie_speed_table.dpm_levels[i].value; + table->LinkLevel[i].PcieLaneCount = + (uint8_t)encode_pcie_lane_width(dpm_table->pcie_speed_table.dpm_levels[i].param1); + table->LinkLevel[i].EnabledForActivity = + 1; + table->LinkLevel[i].SPC = + (uint8_t)(data->pcie_spc_cap & 0xff); + table->LinkLevel[i].DownThreshold = + PP_HOST_TO_SMC_UL(5); + table->LinkLevel[i].UpThreshold = + PP_HOST_TO_SMC_UL(30); + } + + smu_data->smc_state_table.LinkLevelCount = + (uint8_t)dpm_table->pcie_speed_table.count; + data->dpm_level_enable_mask.pcie_dpm_enable_mask = + phm_get_dpm_level_enable_mask_value(&dpm_table->pcie_speed_table); + + return 0; +} + +static int tonga_calculate_sclk_params(struct pp_hwmgr *hwmgr, + uint32_t engine_clock, SMU72_Discrete_GraphicsLevel *sclk) +{ + const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + pp_atomctrl_clock_dividers_vi dividers; + uint32_t spll_func_cntl = data->clock_registers.vCG_SPLL_FUNC_CNTL; + uint32_t spll_func_cntl_3 = data->clock_registers.vCG_SPLL_FUNC_CNTL_3; + uint32_t spll_func_cntl_4 = data->clock_registers.vCG_SPLL_FUNC_CNTL_4; + uint32_t cg_spll_spread_spectrum = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM; + uint32_t cg_spll_spread_spectrum_2 = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM_2; + uint32_t reference_clock; + uint32_t reference_divider; + uint32_t fbdiv; + int result; + + /* get the engine clock dividers for this clock value*/ + result = atomctrl_get_engine_pll_dividers_vi(hwmgr, engine_clock, ÷rs); + + PP_ASSERT_WITH_CODE(result == 0, + "Error retrieving Engine Clock dividers from VBIOS.", return result); + + /* To get FBDIV we need to multiply this by 16384 and divide it by Fref.*/ + reference_clock = atomctrl_get_reference_clock(hwmgr); + + reference_divider = 1 + dividers.uc_pll_ref_div; + + /* low 14 bits is fraction and high 12 bits is divider*/ + fbdiv = dividers.ul_fb_div.ul_fb_divider & 0x3FFFFFF; + + /* SPLL_FUNC_CNTL setup*/ + spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, + CG_SPLL_FUNC_CNTL, SPLL_REF_DIV, dividers.uc_pll_ref_div); + spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, + CG_SPLL_FUNC_CNTL, SPLL_PDIV_A, dividers.uc_pll_post_div); + + /* SPLL_FUNC_CNTL_3 setup*/ + spll_func_cntl_3 = PHM_SET_FIELD(spll_func_cntl_3, + CG_SPLL_FUNC_CNTL_3, SPLL_FB_DIV, fbdiv); + + /* set to use fractional accumulation*/ + spll_func_cntl_3 = PHM_SET_FIELD(spll_func_cntl_3, + CG_SPLL_FUNC_CNTL_3, SPLL_DITHEN, 1); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_EngineSpreadSpectrumSupport)) { + pp_atomctrl_internal_ss_info ss_info; + + uint32_t vcoFreq = engine_clock * dividers.uc_pll_post_div; + if (0 == atomctrl_get_engine_clock_spread_spectrum(hwmgr, vcoFreq, &ss_info)) { + /* + * ss_info.speed_spectrum_percentage -- in unit of 0.01% + * ss_info.speed_spectrum_rate -- in unit of khz + */ + /* clks = reference_clock * 10 / (REFDIV + 1) / speed_spectrum_rate / 2 */ + uint32_t clkS = reference_clock * 5 / (reference_divider * ss_info.speed_spectrum_rate); + + /* clkv = 2 * D * fbdiv / NS */ + uint32_t clkV = 4 * ss_info.speed_spectrum_percentage * fbdiv / (clkS * 10000); + + cg_spll_spread_spectrum = + PHM_SET_FIELD(cg_spll_spread_spectrum, CG_SPLL_SPREAD_SPECTRUM, CLKS, clkS); + cg_spll_spread_spectrum = + PHM_SET_FIELD(cg_spll_spread_spectrum, CG_SPLL_SPREAD_SPECTRUM, SSEN, 1); + cg_spll_spread_spectrum_2 = + PHM_SET_FIELD(cg_spll_spread_spectrum_2, CG_SPLL_SPREAD_SPECTRUM_2, CLKV, clkV); + } + } + + sclk->SclkFrequency = engine_clock; + sclk->CgSpllFuncCntl3 = spll_func_cntl_3; + sclk->CgSpllFuncCntl4 = spll_func_cntl_4; + sclk->SpllSpreadSpectrum = cg_spll_spread_spectrum; + sclk->SpllSpreadSpectrum2 = cg_spll_spread_spectrum_2; + sclk->SclkDid = (uint8_t)dividers.pll_post_divider; + + return 0; +} + +static int tonga_populate_single_graphic_level(struct pp_hwmgr *hwmgr, + uint32_t engine_clock, + uint16_t sclk_activity_level_threshold, + SMU72_Discrete_GraphicsLevel *graphic_level) +{ + int result; + uint32_t mvdd; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *pptable_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + result = tonga_calculate_sclk_params(hwmgr, engine_clock, graphic_level); + + /* populate graphics levels*/ + result = tonga_get_dependency_volt_by_clk(hwmgr, + pptable_info->vdd_dep_on_sclk, engine_clock, + &graphic_level->MinVoltage, &mvdd); + PP_ASSERT_WITH_CODE((!result), + "can not find VDDC voltage value for VDDC " + "engine clock dependency table", return result); + + /* SCLK frequency in units of 10KHz*/ + graphic_level->SclkFrequency = engine_clock; + /* Indicates maximum activity level for this performance level. 50% for now*/ + graphic_level->ActivityLevel = sclk_activity_level_threshold; + + graphic_level->CcPwrDynRm = 0; + graphic_level->CcPwrDynRm1 = 0; + /* this level can be used if activity is high enough.*/ + graphic_level->EnabledForActivity = 0; + /* this level can be used for throttling.*/ + graphic_level->EnabledForThrottle = 1; + graphic_level->UpHyst = 0; + graphic_level->DownHyst = 0; + graphic_level->VoltageDownHyst = 0; + graphic_level->PowerThrottle = 0; + + data->display_timing.min_clock_in_sr = + hwmgr->display_config.min_core_set_clock_in_sr; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SclkDeepSleep)) + graphic_level->DeepSleepDivId = + smu7_get_sleep_divider_id_from_clock(engine_clock, + data->display_timing.min_clock_in_sr); + + /* Default to slow, highest DPM level will be set to PPSMC_DISPLAY_WATERMARK_LOW later.*/ + graphic_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + + if (!result) { + /* CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->MinVoltage);*/ + /* CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->MinVddcPhases);*/ + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->SclkFrequency); + CONVERT_FROM_HOST_TO_SMC_US(graphic_level->ActivityLevel); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->CgSpllFuncCntl3); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->CgSpllFuncCntl4); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->SpllSpreadSpectrum); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->SpllSpreadSpectrum2); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->CcPwrDynRm1); + } + + return result; +} + +static int tonga_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *pptable_info = (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct smu7_dpm_table *dpm_table = &data->dpm_table; + struct phm_ppt_v1_pcie_table *pcie_table = pptable_info->pcie_table; + uint8_t pcie_entry_count = (uint8_t) data->dpm_table.pcie_speed_table.count; + uint32_t level_array_address = smu_data->smu7_data.dpm_table_start + + offsetof(SMU72_Discrete_DpmTable, GraphicsLevel); + + uint32_t level_array_size = sizeof(SMU72_Discrete_GraphicsLevel) * + SMU72_MAX_LEVELS_GRAPHICS; + + SMU72_Discrete_GraphicsLevel *levels = smu_data->smc_state_table.GraphicsLevel; + + uint32_t i, max_entry; + uint8_t highest_pcie_level_enabled = 0; + uint8_t lowest_pcie_level_enabled = 0, mid_pcie_level_enabled = 0; + uint8_t count = 0; + int result = 0; + + memset(levels, 0x00, level_array_size); + + for (i = 0; i < dpm_table->sclk_table.count; i++) { + result = tonga_populate_single_graphic_level(hwmgr, + dpm_table->sclk_table.dpm_levels[i].value, + (uint16_t)smu_data->activity_target[i], + &(smu_data->smc_state_table.GraphicsLevel[i])); + if (result != 0) + return result; + + /* Making sure only DPM level 0-1 have Deep Sleep Div ID populated. */ + if (i > 1) + smu_data->smc_state_table.GraphicsLevel[i].DeepSleepDivId = 0; + } + + /* Only enable level 0 for now. */ + smu_data->smc_state_table.GraphicsLevel[0].EnabledForActivity = 1; + + /* set highest level watermark to high */ + if (dpm_table->sclk_table.count > 1) + smu_data->smc_state_table.GraphicsLevel[dpm_table->sclk_table.count-1].DisplayWatermark = + PPSMC_DISPLAY_WATERMARK_HIGH; + + smu_data->smc_state_table.GraphicsDpmLevelCount = + (uint8_t)dpm_table->sclk_table.count; + data->dpm_level_enable_mask.sclk_dpm_enable_mask = + phm_get_dpm_level_enable_mask_value(&dpm_table->sclk_table); + + if (pcie_table != NULL) { + PP_ASSERT_WITH_CODE((pcie_entry_count >= 1), + "There must be 1 or more PCIE levels defined in PPTable.", + return -EINVAL); + max_entry = pcie_entry_count - 1; /* for indexing, we need to decrement by 1.*/ + for (i = 0; i < dpm_table->sclk_table.count; i++) { + smu_data->smc_state_table.GraphicsLevel[i].pcieDpmLevel = + (uint8_t) ((i < max_entry) ? i : max_entry); + } + } else { + if (0 == data->dpm_level_enable_mask.pcie_dpm_enable_mask) + pr_err("Pcie Dpm Enablemask is 0 !"); + + while (data->dpm_level_enable_mask.pcie_dpm_enable_mask && + ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1<<(highest_pcie_level_enabled+1))) != 0)) { + highest_pcie_level_enabled++; + } + + while (data->dpm_level_enable_mask.pcie_dpm_enable_mask && + ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1<<lowest_pcie_level_enabled)) == 0)) { + lowest_pcie_level_enabled++; + } + + while ((count < highest_pcie_level_enabled) && + ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1<<(lowest_pcie_level_enabled+1+count))) == 0)) { + count++; + } + mid_pcie_level_enabled = (lowest_pcie_level_enabled+1+count) < highest_pcie_level_enabled ? + (lowest_pcie_level_enabled+1+count) : highest_pcie_level_enabled; + + + /* set pcieDpmLevel to highest_pcie_level_enabled*/ + for (i = 2; i < dpm_table->sclk_table.count; i++) + smu_data->smc_state_table.GraphicsLevel[i].pcieDpmLevel = highest_pcie_level_enabled; + + /* set pcieDpmLevel to lowest_pcie_level_enabled*/ + smu_data->smc_state_table.GraphicsLevel[0].pcieDpmLevel = lowest_pcie_level_enabled; + + /* set pcieDpmLevel to mid_pcie_level_enabled*/ + smu_data->smc_state_table.GraphicsLevel[1].pcieDpmLevel = mid_pcie_level_enabled; + } + /* level count will send to smc once at init smc table and never change*/ + result = smu7_copy_bytes_to_smc(hwmgr, level_array_address, + (uint8_t *)levels, (uint32_t)level_array_size, + SMC_RAM_END); + + return result; +} + +static int tonga_calculate_mclk_params( + struct pp_hwmgr *hwmgr, + uint32_t memory_clock, + SMU72_Discrete_MemoryLevel *mclk, + bool strobe_mode, + bool dllStateOn + ) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + uint32_t dll_cntl = data->clock_registers.vDLL_CNTL; + uint32_t mclk_pwrmgt_cntl = data->clock_registers.vMCLK_PWRMGT_CNTL; + uint32_t mpll_ad_func_cntl = data->clock_registers.vMPLL_AD_FUNC_CNTL; + uint32_t mpll_dq_func_cntl = data->clock_registers.vMPLL_DQ_FUNC_CNTL; + uint32_t mpll_func_cntl = data->clock_registers.vMPLL_FUNC_CNTL; + uint32_t mpll_func_cntl_1 = data->clock_registers.vMPLL_FUNC_CNTL_1; + uint32_t mpll_func_cntl_2 = data->clock_registers.vMPLL_FUNC_CNTL_2; + uint32_t mpll_ss1 = data->clock_registers.vMPLL_SS1; + uint32_t mpll_ss2 = data->clock_registers.vMPLL_SS2; + + pp_atomctrl_memory_clock_param mpll_param; + int result; + + result = atomctrl_get_memory_pll_dividers_si(hwmgr, + memory_clock, &mpll_param, strobe_mode); + PP_ASSERT_WITH_CODE( + !result, + "Error retrieving Memory Clock Parameters from VBIOS.", + return result); + + /* MPLL_FUNC_CNTL setup*/ + mpll_func_cntl = PHM_SET_FIELD(mpll_func_cntl, MPLL_FUNC_CNTL, BWCTRL, + mpll_param.bw_ctrl); + + /* MPLL_FUNC_CNTL_1 setup*/ + mpll_func_cntl_1 = PHM_SET_FIELD(mpll_func_cntl_1, + MPLL_FUNC_CNTL_1, CLKF, + mpll_param.mpll_fb_divider.cl_kf); + mpll_func_cntl_1 = PHM_SET_FIELD(mpll_func_cntl_1, + MPLL_FUNC_CNTL_1, CLKFRAC, + mpll_param.mpll_fb_divider.clk_frac); + mpll_func_cntl_1 = PHM_SET_FIELD(mpll_func_cntl_1, + MPLL_FUNC_CNTL_1, VCO_MODE, + mpll_param.vco_mode); + + /* MPLL_AD_FUNC_CNTL setup*/ + mpll_ad_func_cntl = PHM_SET_FIELD(mpll_ad_func_cntl, + MPLL_AD_FUNC_CNTL, YCLK_POST_DIV, + mpll_param.mpll_post_divider); + + if (data->is_memory_gddr5) { + /* MPLL_DQ_FUNC_CNTL setup*/ + mpll_dq_func_cntl = PHM_SET_FIELD(mpll_dq_func_cntl, + MPLL_DQ_FUNC_CNTL, YCLK_SEL, + mpll_param.yclk_sel); + mpll_dq_func_cntl = PHM_SET_FIELD(mpll_dq_func_cntl, + MPLL_DQ_FUNC_CNTL, YCLK_POST_DIV, + mpll_param.mpll_post_divider); + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MemorySpreadSpectrumSupport)) { + /* + ************************************ + Fref = Reference Frequency + NF = Feedback divider ratio + NR = Reference divider ratio + Fnom = Nominal VCO output frequency = Fref * NF / NR + Fs = Spreading Rate + D = Percentage down-spread / 2 + Fint = Reference input frequency to PFD = Fref / NR + NS = Spreading rate divider ratio = int(Fint / (2 * Fs)) + CLKS = NS - 1 = ISS_STEP_NUM[11:0] + NV = D * Fs / Fnom * 4 * ((Fnom/Fref * NR) ^ 2) + CLKV = 65536 * NV = ISS_STEP_SIZE[25:0] + ************************************* + */ + pp_atomctrl_internal_ss_info ss_info; + uint32_t freq_nom; + uint32_t tmp; + uint32_t reference_clock = atomctrl_get_mpll_reference_clock(hwmgr); + + /* for GDDR5 for all modes and DDR3 */ + if (1 == mpll_param.qdr) + freq_nom = memory_clock * 4 * (1 << mpll_param.mpll_post_divider); + else + freq_nom = memory_clock * 2 * (1 << mpll_param.mpll_post_divider); + + /* tmp = (freq_nom / reference_clock * reference_divider) ^ 2 Note: S.I. reference_divider = 1*/ + tmp = (freq_nom / reference_clock); + tmp = tmp * tmp; + + if (0 == atomctrl_get_memory_clock_spread_spectrum(hwmgr, freq_nom, &ss_info)) { + /* ss_info.speed_spectrum_percentage -- in unit of 0.01% */ + /* ss.Info.speed_spectrum_rate -- in unit of khz */ + /* CLKS = reference_clock / (2 * speed_spectrum_rate * reference_divider) * 10 */ + /* = reference_clock * 5 / speed_spectrum_rate */ + uint32_t clks = reference_clock * 5 / ss_info.speed_spectrum_rate; + + /* CLKV = 65536 * speed_spectrum_percentage / 2 * spreadSpecrumRate / freq_nom * 4 / 100000 * ((freq_nom / reference_clock) ^ 2) */ + /* = 131 * speed_spectrum_percentage * speed_spectrum_rate / 100 * ((freq_nom / reference_clock) ^ 2) / freq_nom */ + uint32_t clkv = + (uint32_t)((((131 * ss_info.speed_spectrum_percentage * + ss_info.speed_spectrum_rate) / 100) * tmp) / freq_nom); + + mpll_ss1 = PHM_SET_FIELD(mpll_ss1, MPLL_SS1, CLKV, clkv); + mpll_ss2 = PHM_SET_FIELD(mpll_ss2, MPLL_SS2, CLKS, clks); + } + } + + /* MCLK_PWRMGT_CNTL setup */ + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, DLL_SPEED, mpll_param.dll_speed); + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, MRDCK0_PDNB, dllStateOn); + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, MRDCK1_PDNB, dllStateOn); + + /* Save the result data to outpupt memory level structure */ + mclk->MclkFrequency = memory_clock; + mclk->MpllFuncCntl = mpll_func_cntl; + mclk->MpllFuncCntl_1 = mpll_func_cntl_1; + mclk->MpllFuncCntl_2 = mpll_func_cntl_2; + mclk->MpllAdFuncCntl = mpll_ad_func_cntl; + mclk->MpllDqFuncCntl = mpll_dq_func_cntl; + mclk->MclkPwrmgtCntl = mclk_pwrmgt_cntl; + mclk->DllCntl = dll_cntl; + mclk->MpllSs1 = mpll_ss1; + mclk->MpllSs2 = mpll_ss2; + + return 0; +} + +static uint8_t tonga_get_mclk_frequency_ratio(uint32_t memory_clock, + bool strobe_mode) +{ + uint8_t mc_para_index; + + if (strobe_mode) { + if (memory_clock < 12500) + mc_para_index = 0x00; + else if (memory_clock > 47500) + mc_para_index = 0x0f; + else + mc_para_index = (uint8_t)((memory_clock - 10000) / 2500); + } else { + if (memory_clock < 65000) + mc_para_index = 0x00; + else if (memory_clock > 135000) + mc_para_index = 0x0f; + else + mc_para_index = (uint8_t)((memory_clock - 60000) / 5000); + } + + return mc_para_index; +} + +static uint8_t tonga_get_ddr3_mclk_frequency_ratio(uint32_t memory_clock) +{ + uint8_t mc_para_index; + + if (memory_clock < 10000) + mc_para_index = 0; + else if (memory_clock >= 80000) + mc_para_index = 0x0f; + else + mc_para_index = (uint8_t)((memory_clock - 10000) / 5000 + 1); + + return mc_para_index; +} + + +static int tonga_populate_single_memory_level( + struct pp_hwmgr *hwmgr, + uint32_t memory_clock, + SMU72_Discrete_MemoryLevel *memory_level + ) +{ + uint32_t mvdd = 0; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *pptable_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + int result = 0; + bool dll_state_on; + struct cgs_display_info info = {0}; + uint32_t mclk_edc_wr_enable_threshold = 40000; + uint32_t mclk_stutter_mode_threshold = 30000; + uint32_t mclk_edc_enable_threshold = 40000; + uint32_t mclk_strobe_mode_threshold = 40000; + + if (NULL != pptable_info->vdd_dep_on_mclk) { + result = tonga_get_dependency_volt_by_clk(hwmgr, + pptable_info->vdd_dep_on_mclk, + memory_clock, + &memory_level->MinVoltage, &mvdd); + PP_ASSERT_WITH_CODE( + !result, + "can not find MinVddc voltage value from memory VDDC " + "voltage dependency table", + return result); + } + + if (data->mvdd_control == SMU7_VOLTAGE_CONTROL_NONE) + memory_level->MinMvdd = data->vbios_boot_state.mvdd_bootup_value; + else + memory_level->MinMvdd = mvdd; + + memory_level->EnabledForThrottle = 1; + memory_level->EnabledForActivity = 0; + memory_level->UpHyst = 0; + memory_level->DownHyst = 100; + memory_level->VoltageDownHyst = 0; + + /* Indicates maximum activity level for this performance level.*/ + memory_level->ActivityLevel = (uint16_t)data->mclk_activity_target; + memory_level->StutterEnable = 0; + memory_level->StrobeEnable = 0; + memory_level->EdcReadEnable = 0; + memory_level->EdcWriteEnable = 0; + memory_level->RttEnable = 0; + + /* default set to low watermark. Highest level will be set to high later.*/ + memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + + cgs_get_active_displays_info(hwmgr->device, &info); + data->display_timing.num_existing_displays = info.display_count; + + if ((mclk_stutter_mode_threshold != 0) && + (memory_clock <= mclk_stutter_mode_threshold) && + (!data->is_uvd_enabled) + && (PHM_READ_FIELD(hwmgr->device, DPG_PIPE_STUTTER_CONTROL, STUTTER_ENABLE) & 0x1) + && (data->display_timing.num_existing_displays <= 2) + && (data->display_timing.num_existing_displays != 0)) + memory_level->StutterEnable = 1; + + /* decide strobe mode*/ + memory_level->StrobeEnable = (mclk_strobe_mode_threshold != 0) && + (memory_clock <= mclk_strobe_mode_threshold); + + /* decide EDC mode and memory clock ratio*/ + if (data->is_memory_gddr5) { + memory_level->StrobeRatio = tonga_get_mclk_frequency_ratio(memory_clock, + memory_level->StrobeEnable); + + if ((mclk_edc_enable_threshold != 0) && + (memory_clock > mclk_edc_enable_threshold)) { + memory_level->EdcReadEnable = 1; + } + + if ((mclk_edc_wr_enable_threshold != 0) && + (memory_clock > mclk_edc_wr_enable_threshold)) { + memory_level->EdcWriteEnable = 1; + } + + if (memory_level->StrobeEnable) { + if (tonga_get_mclk_frequency_ratio(memory_clock, 1) >= + ((cgs_read_register(hwmgr->device, mmMC_SEQ_MISC7) >> 16) & 0xf)) { + dll_state_on = ((cgs_read_register(hwmgr->device, mmMC_SEQ_MISC5) >> 1) & 0x1) ? 1 : 0; + } else { + dll_state_on = ((cgs_read_register(hwmgr->device, mmMC_SEQ_MISC6) >> 1) & 0x1) ? 1 : 0; + } + + } else { + dll_state_on = data->dll_default_on; + } + } else { + memory_level->StrobeRatio = + tonga_get_ddr3_mclk_frequency_ratio(memory_clock); + dll_state_on = ((cgs_read_register(hwmgr->device, mmMC_SEQ_MISC5) >> 1) & 0x1) ? 1 : 0; + } + + result = tonga_calculate_mclk_params(hwmgr, + memory_clock, memory_level, memory_level->StrobeEnable, dll_state_on); + + if (!result) { + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MinMvdd); + /* MCLK frequency in units of 10KHz*/ + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MclkFrequency); + /* Indicates maximum activity level for this performance level.*/ + CONVERT_FROM_HOST_TO_SMC_US(memory_level->ActivityLevel); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllFuncCntl); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllFuncCntl_1); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllFuncCntl_2); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllAdFuncCntl); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllDqFuncCntl); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MclkPwrmgtCntl); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->DllCntl); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllSs1); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllSs2); + } + + return result; +} + +int tonga_populate_all_memory_levels(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct tonga_smumgr *smu_data = + (struct tonga_smumgr *)(hwmgr->smu_backend); + struct smu7_dpm_table *dpm_table = &data->dpm_table; + int result; + + /* populate MCLK dpm table to SMU7 */ + uint32_t level_array_address = + smu_data->smu7_data.dpm_table_start + + offsetof(SMU72_Discrete_DpmTable, MemoryLevel); + uint32_t level_array_size = + sizeof(SMU72_Discrete_MemoryLevel) * + SMU72_MAX_LEVELS_MEMORY; + SMU72_Discrete_MemoryLevel *levels = + smu_data->smc_state_table.MemoryLevel; + uint32_t i; + + memset(levels, 0x00, level_array_size); + + for (i = 0; i < dpm_table->mclk_table.count; i++) { + PP_ASSERT_WITH_CODE((0 != dpm_table->mclk_table.dpm_levels[i].value), + "can not populate memory level as memory clock is zero", + return -EINVAL); + result = tonga_populate_single_memory_level( + hwmgr, + dpm_table->mclk_table.dpm_levels[i].value, + &(smu_data->smc_state_table.MemoryLevel[i])); + if (result) + return result; + } + + /* Only enable level 0 for now.*/ + smu_data->smc_state_table.MemoryLevel[0].EnabledForActivity = 1; + + /* + * in order to prevent MC activity from stutter mode to push DPM up. + * the UVD change complements this by putting the MCLK in a higher state + * by default such that we are not effected by up threshold or and MCLK DPM latency. + */ + smu_data->smc_state_table.MemoryLevel[0].ActivityLevel = 0x1F; + CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table.MemoryLevel[0].ActivityLevel); + + smu_data->smc_state_table.MemoryDpmLevelCount = (uint8_t)dpm_table->mclk_table.count; + data->dpm_level_enable_mask.mclk_dpm_enable_mask = phm_get_dpm_level_enable_mask_value(&dpm_table->mclk_table); + /* set highest level watermark to high*/ + smu_data->smc_state_table.MemoryLevel[dpm_table->mclk_table.count-1].DisplayWatermark = PPSMC_DISPLAY_WATERMARK_HIGH; + + /* level count will send to smc once at init smc table and never change*/ + result = smu7_copy_bytes_to_smc(hwmgr, + level_array_address, (uint8_t *)levels, (uint32_t)level_array_size, + SMC_RAM_END); + + return result; +} + +static int tonga_populate_mvdd_value(struct pp_hwmgr *hwmgr, + uint32_t mclk, SMIO_Pattern *smio_pattern) +{ + const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + uint32_t i = 0; + + if (SMU7_VOLTAGE_CONTROL_NONE != data->mvdd_control) { + /* find mvdd value which clock is more than request */ + for (i = 0; i < table_info->vdd_dep_on_mclk->count; i++) { + if (mclk <= table_info->vdd_dep_on_mclk->entries[i].clk) { + /* Always round to higher voltage. */ + smio_pattern->Voltage = + data->mvdd_voltage_table.entries[i].value; + break; + } + } + + PP_ASSERT_WITH_CODE(i < table_info->vdd_dep_on_mclk->count, + "MVDD Voltage is outside the supported range.", + return -EINVAL); + } else { + return -EINVAL; + } + + return 0; +} + + +static int tonga_populate_smc_acpi_level(struct pp_hwmgr *hwmgr, + SMU72_Discrete_DpmTable *table) +{ + int result = 0; + struct tonga_smumgr *smu_data = + (struct tonga_smumgr *)(hwmgr->smu_backend); + const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct pp_atomctrl_clock_dividers_vi dividers; + + SMIO_Pattern voltage_level; + uint32_t spll_func_cntl = data->clock_registers.vCG_SPLL_FUNC_CNTL; + uint32_t spll_func_cntl_2 = data->clock_registers.vCG_SPLL_FUNC_CNTL_2; + uint32_t dll_cntl = data->clock_registers.vDLL_CNTL; + uint32_t mclk_pwrmgt_cntl = data->clock_registers.vMCLK_PWRMGT_CNTL; + + /* The ACPI state should not do DPM on DC (or ever).*/ + table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC; + + table->ACPILevel.MinVoltage = + smu_data->smc_state_table.GraphicsLevel[0].MinVoltage; + + /* assign zero for now*/ + table->ACPILevel.SclkFrequency = atomctrl_get_reference_clock(hwmgr); + + /* get the engine clock dividers for this clock value*/ + result = atomctrl_get_engine_pll_dividers_vi(hwmgr, + table->ACPILevel.SclkFrequency, ÷rs); + + PP_ASSERT_WITH_CODE(result == 0, + "Error retrieving Engine Clock dividers from VBIOS.", + return result); + + /* divider ID for required SCLK*/ + table->ACPILevel.SclkDid = (uint8_t)dividers.pll_post_divider; + table->ACPILevel.DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + table->ACPILevel.DeepSleepDivId = 0; + + spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, CG_SPLL_FUNC_CNTL, + SPLL_PWRON, 0); + spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, CG_SPLL_FUNC_CNTL, + SPLL_RESET, 1); + spll_func_cntl_2 = PHM_SET_FIELD(spll_func_cntl_2, CG_SPLL_FUNC_CNTL_2, + SCLK_MUX_SEL, 4); + + table->ACPILevel.CgSpllFuncCntl = spll_func_cntl; + table->ACPILevel.CgSpllFuncCntl2 = spll_func_cntl_2; + table->ACPILevel.CgSpllFuncCntl3 = data->clock_registers.vCG_SPLL_FUNC_CNTL_3; + table->ACPILevel.CgSpllFuncCntl4 = data->clock_registers.vCG_SPLL_FUNC_CNTL_4; + table->ACPILevel.SpllSpreadSpectrum = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM; + table->ACPILevel.SpllSpreadSpectrum2 = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM_2; + table->ACPILevel.CcPwrDynRm = 0; + table->ACPILevel.CcPwrDynRm1 = 0; + + + /* For various features to be enabled/disabled while this level is active.*/ + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.Flags); + /* SCLK frequency in units of 10KHz*/ + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SclkFrequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl2); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl3); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl4); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SpllSpreadSpectrum); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SpllSpreadSpectrum2); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm1); + + /* table->MemoryACPILevel.MinVddcPhases = table->ACPILevel.MinVddcPhases;*/ + table->MemoryACPILevel.MinVoltage = + smu_data->smc_state_table.MemoryLevel[0].MinVoltage; + + /* CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MinVoltage);*/ + + if (0 == tonga_populate_mvdd_value(hwmgr, 0, &voltage_level)) + table->MemoryACPILevel.MinMvdd = + PP_HOST_TO_SMC_UL(voltage_level.Voltage * VOLTAGE_SCALE); + else + table->MemoryACPILevel.MinMvdd = 0; + + /* Force reset on DLL*/ + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, MRDCK0_RESET, 0x1); + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, MRDCK1_RESET, 0x1); + + /* Disable DLL in ACPIState*/ + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, MRDCK0_PDNB, 0); + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, MRDCK1_PDNB, 0); + + /* Enable DLL bypass signal*/ + dll_cntl = PHM_SET_FIELD(dll_cntl, + DLL_CNTL, MRDCK0_BYPASS, 0); + dll_cntl = PHM_SET_FIELD(dll_cntl, + DLL_CNTL, MRDCK1_BYPASS, 0); + + table->MemoryACPILevel.DllCntl = + PP_HOST_TO_SMC_UL(dll_cntl); + table->MemoryACPILevel.MclkPwrmgtCntl = + PP_HOST_TO_SMC_UL(mclk_pwrmgt_cntl); + table->MemoryACPILevel.MpllAdFuncCntl = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_AD_FUNC_CNTL); + table->MemoryACPILevel.MpllDqFuncCntl = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_DQ_FUNC_CNTL); + table->MemoryACPILevel.MpllFuncCntl = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_FUNC_CNTL); + table->MemoryACPILevel.MpllFuncCntl_1 = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_FUNC_CNTL_1); + table->MemoryACPILevel.MpllFuncCntl_2 = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_FUNC_CNTL_2); + table->MemoryACPILevel.MpllSs1 = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_SS1); + table->MemoryACPILevel.MpllSs2 = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_SS2); + + table->MemoryACPILevel.EnabledForThrottle = 0; + table->MemoryACPILevel.EnabledForActivity = 0; + table->MemoryACPILevel.UpHyst = 0; + table->MemoryACPILevel.DownHyst = 100; + table->MemoryACPILevel.VoltageDownHyst = 0; + /* Indicates maximum activity level for this performance level.*/ + table->MemoryACPILevel.ActivityLevel = + PP_HOST_TO_SMC_US((uint16_t)data->mclk_activity_target); + + table->MemoryACPILevel.StutterEnable = 0; + table->MemoryACPILevel.StrobeEnable = 0; + table->MemoryACPILevel.EdcReadEnable = 0; + table->MemoryACPILevel.EdcWriteEnable = 0; + table->MemoryACPILevel.RttEnable = 0; + + return result; +} + +static int tonga_populate_smc_uvd_level(struct pp_hwmgr *hwmgr, + SMU72_Discrete_DpmTable *table) +{ + int result = 0; + + uint8_t count; + pp_atomctrl_clock_dividers_vi dividers; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *pptable_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = + pptable_info->mm_dep_table; + + table->UvdLevelCount = (uint8_t) (mm_table->count); + table->UvdBootLevel = 0; + + for (count = 0; count < table->UvdLevelCount; count++) { + table->UvdLevel[count].VclkFrequency = mm_table->entries[count].vclk; + table->UvdLevel[count].DclkFrequency = mm_table->entries[count].dclk; + table->UvdLevel[count].MinVoltage.Vddc = + phm_get_voltage_index(pptable_info->vddc_lookup_table, + mm_table->entries[count].vddc); + table->UvdLevel[count].MinVoltage.VddGfx = + (data->vdd_gfx_control == SMU7_VOLTAGE_CONTROL_BY_SVID2) ? + phm_get_voltage_index(pptable_info->vddgfx_lookup_table, + mm_table->entries[count].vddgfx) : 0; + table->UvdLevel[count].MinVoltage.Vddci = + phm_get_voltage_id(&data->vddci_voltage_table, + mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); + table->UvdLevel[count].MinVoltage.Phases = 1; + + /* retrieve divider value for VBIOS */ + result = atomctrl_get_dfs_pll_dividers_vi( + hwmgr, + table->UvdLevel[count].VclkFrequency, + ÷rs); + + PP_ASSERT_WITH_CODE((!result), + "can not find divide id for Vclk clock", + return result); + + table->UvdLevel[count].VclkDivider = (uint8_t)dividers.pll_post_divider; + + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->UvdLevel[count].DclkFrequency, ÷rs); + PP_ASSERT_WITH_CODE((!result), + "can not find divide id for Dclk clock", + return result); + + table->UvdLevel[count].DclkDivider = + (uint8_t)dividers.pll_post_divider; + + CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].VclkFrequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].DclkFrequency); + } + + return result; + +} + +static int tonga_populate_smc_vce_level(struct pp_hwmgr *hwmgr, + SMU72_Discrete_DpmTable *table) +{ + int result = 0; + + uint8_t count; + pp_atomctrl_clock_dividers_vi dividers; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *pptable_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = + pptable_info->mm_dep_table; + + table->VceLevelCount = (uint8_t) (mm_table->count); + table->VceBootLevel = 0; + + for (count = 0; count < table->VceLevelCount; count++) { + table->VceLevel[count].Frequency = + mm_table->entries[count].eclk; + table->VceLevel[count].MinVoltage.Vddc = + phm_get_voltage_index(pptable_info->vddc_lookup_table, + mm_table->entries[count].vddc); + table->VceLevel[count].MinVoltage.VddGfx = + (data->vdd_gfx_control == SMU7_VOLTAGE_CONTROL_BY_SVID2) ? + phm_get_voltage_index(pptable_info->vddgfx_lookup_table, + mm_table->entries[count].vddgfx) : 0; + table->VceLevel[count].MinVoltage.Vddci = + phm_get_voltage_id(&data->vddci_voltage_table, + mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); + table->VceLevel[count].MinVoltage.Phases = 1; + + /* retrieve divider value for VBIOS */ + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->VceLevel[count].Frequency, ÷rs); + PP_ASSERT_WITH_CODE((!result), + "can not find divide id for VCE engine clock", + return result); + + table->VceLevel[count].Divider = (uint8_t)dividers.pll_post_divider; + + CONVERT_FROM_HOST_TO_SMC_UL(table->VceLevel[count].Frequency); + } + + return result; +} + +static int tonga_populate_smc_acp_level(struct pp_hwmgr *hwmgr, + SMU72_Discrete_DpmTable *table) +{ + int result = 0; + uint8_t count; + pp_atomctrl_clock_dividers_vi dividers; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *pptable_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = + pptable_info->mm_dep_table; + + table->AcpLevelCount = (uint8_t) (mm_table->count); + table->AcpBootLevel = 0; + + for (count = 0; count < table->AcpLevelCount; count++) { + table->AcpLevel[count].Frequency = + pptable_info->mm_dep_table->entries[count].aclk; + table->AcpLevel[count].MinVoltage.Vddc = + phm_get_voltage_index(pptable_info->vddc_lookup_table, + mm_table->entries[count].vddc); + table->AcpLevel[count].MinVoltage.VddGfx = + (data->vdd_gfx_control == SMU7_VOLTAGE_CONTROL_BY_SVID2) ? + phm_get_voltage_index(pptable_info->vddgfx_lookup_table, + mm_table->entries[count].vddgfx) : 0; + table->AcpLevel[count].MinVoltage.Vddci = + phm_get_voltage_id(&data->vddci_voltage_table, + mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); + table->AcpLevel[count].MinVoltage.Phases = 1; + + /* retrieve divider value for VBIOS */ + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->AcpLevel[count].Frequency, ÷rs); + PP_ASSERT_WITH_CODE((!result), + "can not find divide id for engine clock", return result); + + table->AcpLevel[count].Divider = (uint8_t)dividers.pll_post_divider; + + CONVERT_FROM_HOST_TO_SMC_UL(table->AcpLevel[count].Frequency); + } + + return result; +} + +static int tonga_populate_smc_samu_level(struct pp_hwmgr *hwmgr, + SMU72_Discrete_DpmTable *table) +{ + int result = 0; + uint8_t count; + pp_atomctrl_clock_dividers_vi dividers; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *pptable_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = + pptable_info->mm_dep_table; + + table->SamuBootLevel = 0; + table->SamuLevelCount = (uint8_t) (mm_table->count); + + for (count = 0; count < table->SamuLevelCount; count++) { + /* not sure whether we need evclk or not */ + table->SamuLevel[count].Frequency = + pptable_info->mm_dep_table->entries[count].samclock; + table->SamuLevel[count].MinVoltage.Vddc = + phm_get_voltage_index(pptable_info->vddc_lookup_table, + mm_table->entries[count].vddc); + table->SamuLevel[count].MinVoltage.VddGfx = + (data->vdd_gfx_control == SMU7_VOLTAGE_CONTROL_BY_SVID2) ? + phm_get_voltage_index(pptable_info->vddgfx_lookup_table, + mm_table->entries[count].vddgfx) : 0; + table->SamuLevel[count].MinVoltage.Vddci = + phm_get_voltage_id(&data->vddci_voltage_table, + mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); + table->SamuLevel[count].MinVoltage.Phases = 1; + + /* retrieve divider value for VBIOS */ + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->SamuLevel[count].Frequency, ÷rs); + PP_ASSERT_WITH_CODE((!result), + "can not find divide id for samu clock", return result); + + table->SamuLevel[count].Divider = (uint8_t)dividers.pll_post_divider; + + CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].Frequency); + } + + return result; +} + +static int tonga_populate_memory_timing_parameters( + struct pp_hwmgr *hwmgr, + uint32_t engine_clock, + uint32_t memory_clock, + struct SMU72_Discrete_MCArbDramTimingTableEntry *arb_regs + ) +{ + uint32_t dramTiming; + uint32_t dramTiming2; + uint32_t burstTime; + int result; + + result = atomctrl_set_engine_dram_timings_rv770(hwmgr, + engine_clock, memory_clock); + + PP_ASSERT_WITH_CODE(result == 0, + "Error calling VBIOS to set DRAM_TIMING.", return result); + + dramTiming = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING); + dramTiming2 = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING2); + burstTime = PHM_READ_FIELD(hwmgr->device, MC_ARB_BURST_TIME, STATE0); + + arb_regs->McArbDramTiming = PP_HOST_TO_SMC_UL(dramTiming); + arb_regs->McArbDramTiming2 = PP_HOST_TO_SMC_UL(dramTiming2); + arb_regs->McArbBurstTime = (uint8_t)burstTime; + + return 0; +} + +static int tonga_program_memory_timing_parameters(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct tonga_smumgr *smu_data = + (struct tonga_smumgr *)(hwmgr->smu_backend); + int result = 0; + SMU72_Discrete_MCArbDramTimingTable arb_regs; + uint32_t i, j; + + memset(&arb_regs, 0x00, sizeof(SMU72_Discrete_MCArbDramTimingTable)); + + for (i = 0; i < data->dpm_table.sclk_table.count; i++) { + for (j = 0; j < data->dpm_table.mclk_table.count; j++) { + result = tonga_populate_memory_timing_parameters + (hwmgr, data->dpm_table.sclk_table.dpm_levels[i].value, + data->dpm_table.mclk_table.dpm_levels[j].value, + &arb_regs.entries[i][j]); + + if (result) + break; + } + } + + if (!result) { + result = smu7_copy_bytes_to_smc( + hwmgr, + smu_data->smu7_data.arb_table_start, + (uint8_t *)&arb_regs, + sizeof(SMU72_Discrete_MCArbDramTimingTable), + SMC_RAM_END + ); + } + + return result; +} + +static int tonga_populate_smc_boot_level(struct pp_hwmgr *hwmgr, + SMU72_Discrete_DpmTable *table) +{ + int result = 0; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct tonga_smumgr *smu_data = + (struct tonga_smumgr *)(hwmgr->smu_backend); + table->GraphicsBootLevel = 0; + table->MemoryBootLevel = 0; + + /* find boot level from dpm table*/ + result = phm_find_boot_level(&(data->dpm_table.sclk_table), + data->vbios_boot_state.sclk_bootup_value, + (uint32_t *)&(smu_data->smc_state_table.GraphicsBootLevel)); + + if (result != 0) { + smu_data->smc_state_table.GraphicsBootLevel = 0; + pr_err("[powerplay] VBIOS did not find boot engine " + "clock value in dependency table. " + "Using Graphics DPM level 0 !"); + result = 0; + } + + result = phm_find_boot_level(&(data->dpm_table.mclk_table), + data->vbios_boot_state.mclk_bootup_value, + (uint32_t *)&(smu_data->smc_state_table.MemoryBootLevel)); + + if (result != 0) { + smu_data->smc_state_table.MemoryBootLevel = 0; + pr_err("[powerplay] VBIOS did not find boot " + "engine clock value in dependency table." + "Using Memory DPM level 0 !"); + result = 0; + } + + table->BootVoltage.Vddc = + phm_get_voltage_id(&(data->vddc_voltage_table), + data->vbios_boot_state.vddc_bootup_value); + table->BootVoltage.VddGfx = + phm_get_voltage_id(&(data->vddgfx_voltage_table), + data->vbios_boot_state.vddgfx_bootup_value); + table->BootVoltage.Vddci = + phm_get_voltage_id(&(data->vddci_voltage_table), + data->vbios_boot_state.vddci_bootup_value); + table->BootMVdd = data->vbios_boot_state.mvdd_bootup_value; + + CONVERT_FROM_HOST_TO_SMC_US(table->BootMVdd); + + return result; +} + +static int tonga_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr) +{ + uint32_t ro, efuse, efuse2, clock_freq, volt_without_cks, + volt_with_cks, value; + uint16_t clock_freq_u16; + struct tonga_smumgr *smu_data = + (struct tonga_smumgr *)(hwmgr->smu_backend); + uint8_t type, i, j, cks_setting, stretch_amount, stretch_amount2, + volt_offset = 0; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table = + table_info->vdd_dep_on_sclk; + uint32_t hw_revision, dev_id; + struct cgs_system_info sys_info = {0}; + + stretch_amount = (uint8_t)table_info->cac_dtp_table->usClockStretchAmount; + + sys_info.size = sizeof(struct cgs_system_info); + + sys_info.info_id = CGS_SYSTEM_INFO_PCIE_REV; + cgs_query_system_info(hwmgr->device, &sys_info); + hw_revision = (uint32_t)sys_info.value; + + sys_info.info_id = CGS_SYSTEM_INFO_PCIE_DEV; + cgs_query_system_info(hwmgr->device, &sys_info); + dev_id = (uint32_t)sys_info.value; + + /* Read SMU_Eefuse to read and calculate RO and determine + * if the part is SS or FF. if RO >= 1660MHz, part is FF. + */ + efuse = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixSMU_EFUSE_0 + (146 * 4)); + efuse2 = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixSMU_EFUSE_0 + (148 * 4)); + efuse &= 0xFF000000; + efuse = efuse >> 24; + efuse2 &= 0xF; + + if (efuse2 == 1) + ro = (2300 - 1350) * efuse / 255 + 1350; + else + ro = (2500 - 1000) * efuse / 255 + 1000; + + if (ro >= 1660) + type = 0; + else + type = 1; + + /* Populate Stretch amount */ + smu_data->smc_state_table.ClockStretcherAmount = stretch_amount; + + + /* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */ + for (i = 0; i < sclk_table->count; i++) { + smu_data->smc_state_table.Sclk_CKS_masterEn0_7 |= + sclk_table->entries[i].cks_enable << i; + if (ASICID_IS_TONGA_P(dev_id, hw_revision)) { + volt_without_cks = (uint32_t)((7732 + 60 - ro - 20838 * + (sclk_table->entries[i].clk/100) / 10000) * 1000 / + (8730 - (5301 * (sclk_table->entries[i].clk/100) / 1000))); + volt_with_cks = (uint32_t)((5250 + 51 - ro - 2404 * + (sclk_table->entries[i].clk/100) / 100000) * 1000 / + (6146 - (3193 * (sclk_table->entries[i].clk/100) / 1000))); + } else { + volt_without_cks = (uint32_t)((14041 * + (sclk_table->entries[i].clk/100) / 10000 + 3571 + 75 - ro) * 1000 / + (4026 - (13924 * (sclk_table->entries[i].clk/100) / 10000))); + volt_with_cks = (uint32_t)((13946 * + (sclk_table->entries[i].clk/100) / 10000 + 3320 + 45 - ro) * 1000 / + (3664 - (11454 * (sclk_table->entries[i].clk/100) / 10000))); + } + if (volt_without_cks >= volt_with_cks) + volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks + + sclk_table->entries[i].cks_voffset) * 100 / 625) + 1); + smu_data->smc_state_table.Sclk_voltageOffset[i] = volt_offset; + } + + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, + STRETCH_ENABLE, 0x0); + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, + masterReset, 0x1); + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, + staticEnable, 0x1); + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, + masterReset, 0x0); + + /* Populate CKS Lookup Table */ + if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5) + stretch_amount2 = 0; + else if (stretch_amount == 3 || stretch_amount == 4) + stretch_amount2 = 1; + else { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ClockStretcher); + PP_ASSERT_WITH_CODE(false, + "Stretch Amount in PPTable not supported\n", + return -EINVAL); + } + + value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixPWR_CKS_CNTL); + value &= 0xFFC2FF87; + smu_data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].minFreq = + tonga_clock_stretcher_lookup_table[stretch_amount2][0]; + smu_data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].maxFreq = + tonga_clock_stretcher_lookup_table[stretch_amount2][1]; + clock_freq_u16 = (uint16_t)(PP_SMC_TO_HOST_UL(smu_data->smc_state_table. + GraphicsLevel[smu_data->smc_state_table.GraphicsDpmLevelCount - 1]. + SclkFrequency) / 100); + if (tonga_clock_stretcher_lookup_table[stretch_amount2][0] < + clock_freq_u16 && + tonga_clock_stretcher_lookup_table[stretch_amount2][1] > + clock_freq_u16) { + /* Program PWR_CKS_CNTL. CKS_USE_FOR_LOW_FREQ */ + value |= (tonga_clock_stretcher_lookup_table[stretch_amount2][3]) << 16; + /* Program PWR_CKS_CNTL. CKS_LDO_REFSEL */ + value |= (tonga_clock_stretcher_lookup_table[stretch_amount2][2]) << 18; + /* Program PWR_CKS_CNTL. CKS_STRETCH_AMOUNT */ + value |= (tonga_clock_stretch_amount_conversion + [tonga_clock_stretcher_lookup_table[stretch_amount2][3]] + [stretch_amount]) << 3; + } + CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table.CKS_LOOKUPTable. + CKS_LOOKUPTableEntry[0].minFreq); + CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table.CKS_LOOKUPTable. + CKS_LOOKUPTableEntry[0].maxFreq); + smu_data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting = + tonga_clock_stretcher_lookup_table[stretch_amount2][2] & 0x7F; + smu_data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting |= + (tonga_clock_stretcher_lookup_table[stretch_amount2][3]) << 7; + + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixPWR_CKS_CNTL, value); + + /* Populate DDT Lookup Table */ + for (i = 0; i < 4; i++) { + /* Assign the minimum and maximum VID stored + * in the last row of Clock Stretcher Voltage Table. + */ + smu_data->smc_state_table.ClockStretcherDataTable. + ClockStretcherDataTableEntry[i].minVID = + (uint8_t) tonga_clock_stretcher_ddt_table[type][i][2]; + smu_data->smc_state_table.ClockStretcherDataTable. + ClockStretcherDataTableEntry[i].maxVID = + (uint8_t) tonga_clock_stretcher_ddt_table[type][i][3]; + /* Loop through each SCLK and check the frequency + * to see if it lies within the frequency for clock stretcher. + */ + for (j = 0; j < smu_data->smc_state_table.GraphicsDpmLevelCount; j++) { + cks_setting = 0; + clock_freq = PP_SMC_TO_HOST_UL( + smu_data->smc_state_table.GraphicsLevel[j].SclkFrequency); + /* Check the allowed frequency against the sclk level[j]. + * Sclk's endianness has already been converted, + * and it's in 10Khz unit, + * as opposed to Data table, which is in Mhz unit. + */ + if (clock_freq >= tonga_clock_stretcher_ddt_table[type][i][0] * 100) { + cks_setting |= 0x2; + if (clock_freq < tonga_clock_stretcher_ddt_table[type][i][1] * 100) + cks_setting |= 0x1; + } + smu_data->smc_state_table.ClockStretcherDataTable. + ClockStretcherDataTableEntry[i].setting |= cks_setting << (j * 2); + } + CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table. + ClockStretcherDataTable. + ClockStretcherDataTableEntry[i].setting); + } + + value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixPWR_CKS_CNTL); + value &= 0xFFFFFFFE; + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixPWR_CKS_CNTL, value); + + return 0; +} + +static int tonga_populate_vr_config(struct pp_hwmgr *hwmgr, + SMU72_Discrete_DpmTable *table) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint16_t config; + + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vdd_gfx_control) { + /* Splitted mode */ + config = VR_SVI2_PLANE_1; + table->VRConfig |= (config<<VRCONF_VDDGFX_SHIFT); + + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) { + config = VR_SVI2_PLANE_2; + table->VRConfig |= config; + } else { + pr_err("VDDC and VDDGFX should " + "be both on SVI2 control in splitted mode !\n"); + } + } else { + /* Merged mode */ + config = VR_MERGED_WITH_VDDC; + table->VRConfig |= (config<<VRCONF_VDDGFX_SHIFT); + + /* Set Vddc Voltage Controller */ + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) { + config = VR_SVI2_PLANE_1; + table->VRConfig |= config; + } else { + pr_err("VDDC should be on " + "SVI2 control in merged mode !\n"); + } + } + + /* Set Vddci Voltage Controller */ + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) { + config = VR_SVI2_PLANE_2; /* only in merged mode */ + table->VRConfig |= (config<<VRCONF_VDDCI_SHIFT); + } else if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) { + config = VR_SMIO_PATTERN_1; + table->VRConfig |= (config<<VRCONF_VDDCI_SHIFT); + } + + /* Set Mvdd Voltage Controller */ + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) { + config = VR_SMIO_PATTERN_2; + table->VRConfig |= (config<<VRCONF_MVDD_SHIFT); + } + + return 0; +} + +static int tonga_init_arb_table_index(struct pp_hwmgr *hwmgr) +{ + struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); + uint32_t tmp; + int result; + + /* + * This is a read-modify-write on the first byte of the ARB table. + * The first byte in the SMU72_Discrete_MCArbDramTimingTable structure + * is the field 'current'. + * This solution is ugly, but we never write the whole table only + * individual fields in it. + * In reality this field should not be in that structure + * but in a soft register. + */ + result = smu7_read_smc_sram_dword(hwmgr, + smu_data->smu7_data.arb_table_start, &tmp, SMC_RAM_END); + + if (result != 0) + return result; + + tmp &= 0x00FFFFFF; + tmp |= ((uint32_t)MC_CG_ARB_FREQ_F1) << 24; + + return smu7_write_smc_sram_dword(hwmgr, + smu_data->smu7_data.arb_table_start, tmp, SMC_RAM_END); +} + + +static int tonga_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr) +{ + struct tonga_smumgr *smu_data = + (struct tonga_smumgr *)(hwmgr->smu_backend); + const struct tonga_pt_defaults *defaults = smu_data->power_tune_defaults; + SMU72_Discrete_DpmTable *dpm_table = &(smu_data->smc_state_table); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_cac_tdp_table *cac_dtp_table = table_info->cac_dtp_table; + int i, j, k; + const uint16_t *pdef1, *pdef2; + + dpm_table->DefaultTdp = PP_HOST_TO_SMC_US( + (uint16_t)(cac_dtp_table->usTDP * 256)); + dpm_table->TargetTdp = PP_HOST_TO_SMC_US( + (uint16_t)(cac_dtp_table->usConfigurableTDP * 256)); + + PP_ASSERT_WITH_CODE(cac_dtp_table->usTargetOperatingTemp <= 255, + "Target Operating Temp is out of Range !", + ); + + dpm_table->GpuTjMax = (uint8_t)(cac_dtp_table->usTargetOperatingTemp); + dpm_table->GpuTjHyst = 8; + + dpm_table->DTEAmbientTempBase = defaults->dte_ambient_temp_base; + + dpm_table->BAPM_TEMP_GRADIENT = + PP_HOST_TO_SMC_UL(defaults->bapm_temp_gradient); + pdef1 = defaults->bapmti_r; + pdef2 = defaults->bapmti_rc; + + for (i = 0; i < SMU72_DTE_ITERATIONS; i++) { + for (j = 0; j < SMU72_DTE_SOURCES; j++) { + for (k = 0; k < SMU72_DTE_SINKS; k++) { + dpm_table->BAPMTI_R[i][j][k] = + PP_HOST_TO_SMC_US(*pdef1); + dpm_table->BAPMTI_RC[i][j][k] = + PP_HOST_TO_SMC_US(*pdef2); + pdef1++; + pdef2++; + } + } + } + + return 0; +} + +static int tonga_populate_svi_load_line(struct pp_hwmgr *hwmgr) +{ + struct tonga_smumgr *smu_data = + (struct tonga_smumgr *)(hwmgr->smu_backend); + const struct tonga_pt_defaults *defaults = smu_data->power_tune_defaults; + + smu_data->power_tune_table.SviLoadLineEn = defaults->svi_load_line_en; + smu_data->power_tune_table.SviLoadLineVddC = defaults->svi_load_line_vddC; + smu_data->power_tune_table.SviLoadLineTrimVddC = 3; + smu_data->power_tune_table.SviLoadLineOffsetVddC = 0; + + return 0; +} + +static int tonga_populate_tdc_limit(struct pp_hwmgr *hwmgr) +{ + uint16_t tdc_limit; + struct tonga_smumgr *smu_data = + (struct tonga_smumgr *)(hwmgr->smu_backend); + const struct tonga_pt_defaults *defaults = smu_data->power_tune_defaults; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + /* TDC number of fraction bits are changed from 8 to 7 + * for Fiji as requested by SMC team + */ + tdc_limit = (uint16_t)(table_info->cac_dtp_table->usTDC * 256); + smu_data->power_tune_table.TDC_VDDC_PkgLimit = + CONVERT_FROM_HOST_TO_SMC_US(tdc_limit); + smu_data->power_tune_table.TDC_VDDC_ThrottleReleaseLimitPerc = + defaults->tdc_vddc_throttle_release_limit_perc; + smu_data->power_tune_table.TDC_MAWt = defaults->tdc_mawt; + + return 0; +} + +static int tonga_populate_dw8(struct pp_hwmgr *hwmgr, uint32_t fuse_table_offset) +{ + struct tonga_smumgr *smu_data = + (struct tonga_smumgr *)(hwmgr->smu_backend); + const struct tonga_pt_defaults *defaults = smu_data->power_tune_defaults; + uint32_t temp; + + if (smu7_read_smc_sram_dword(hwmgr, + fuse_table_offset + + offsetof(SMU72_Discrete_PmFuses, TdcWaterfallCtl), + (uint32_t *)&temp, SMC_RAM_END)) + PP_ASSERT_WITH_CODE(false, + "Attempt to read PmFuses.DW6 " + "(SviLoadLineEn) from SMC Failed !", + return -EINVAL); + else + smu_data->power_tune_table.TdcWaterfallCtl = defaults->tdc_waterfall_ctl; + + return 0; +} + +static int tonga_populate_temperature_scaler(struct pp_hwmgr *hwmgr) +{ + int i; + struct tonga_smumgr *smu_data = + (struct tonga_smumgr *)(hwmgr->smu_backend); + + /* Currently not used. Set all to zero. */ + for (i = 0; i < 16; i++) + smu_data->power_tune_table.LPMLTemperatureScaler[i] = 0; + + return 0; +} + +static int tonga_populate_fuzzy_fan(struct pp_hwmgr *hwmgr) +{ + struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); + + if ((hwmgr->thermal_controller.advanceFanControlParameters. + usFanOutputSensitivity & (1 << 15)) || + (hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity == 0)) + hwmgr->thermal_controller.advanceFanControlParameters. + usFanOutputSensitivity = hwmgr->thermal_controller. + advanceFanControlParameters.usDefaultFanOutputSensitivity; + + smu_data->power_tune_table.FuzzyFan_PwmSetDelta = + PP_HOST_TO_SMC_US(hwmgr->thermal_controller. + advanceFanControlParameters.usFanOutputSensitivity); + return 0; +} + +static int tonga_populate_gnb_lpml(struct pp_hwmgr *hwmgr) +{ + int i; + struct tonga_smumgr *smu_data = + (struct tonga_smumgr *)(hwmgr->smu_backend); + + /* Currently not used. Set all to zero. */ + for (i = 0; i < 16; i++) + smu_data->power_tune_table.GnbLPML[i] = 0; + + return 0; +} + +static int tonga_populate_bapm_vddc_base_leakage_sidd(struct pp_hwmgr *hwmgr) +{ + struct tonga_smumgr *smu_data = + (struct tonga_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + uint16_t hi_sidd = smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd; + uint16_t lo_sidd = smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd; + struct phm_cac_tdp_table *cac_table = table_info->cac_dtp_table; + + hi_sidd = (uint16_t)(cac_table->usHighCACLeakage / 100 * 256); + lo_sidd = (uint16_t)(cac_table->usLowCACLeakage / 100 * 256); + + smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd = + CONVERT_FROM_HOST_TO_SMC_US(hi_sidd); + smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd = + CONVERT_FROM_HOST_TO_SMC_US(lo_sidd); + + return 0; +} + +static int tonga_populate_pm_fuses(struct pp_hwmgr *hwmgr) +{ + struct tonga_smumgr *smu_data = + (struct tonga_smumgr *)(hwmgr->smu_backend); + uint32_t pm_fuse_table_offset; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_PowerContainment)) { + if (smu7_read_smc_sram_dword(hwmgr, + SMU72_FIRMWARE_HEADER_LOCATION + + offsetof(SMU72_Firmware_Header, PmFuseTable), + &pm_fuse_table_offset, SMC_RAM_END)) + PP_ASSERT_WITH_CODE(false, + "Attempt to get pm_fuse_table_offset Failed !", + return -EINVAL); + + /* DW6 */ + if (tonga_populate_svi_load_line(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate SviLoadLine Failed !", + return -EINVAL); + /* DW7 */ + if (tonga_populate_tdc_limit(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate TDCLimit Failed !", + return -EINVAL); + /* DW8 */ + if (tonga_populate_dw8(hwmgr, pm_fuse_table_offset)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate TdcWaterfallCtl Failed !", + return -EINVAL); + + /* DW9-DW12 */ + if (tonga_populate_temperature_scaler(hwmgr) != 0) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate LPMLTemperatureScaler Failed !", + return -EINVAL); + + /* DW13-DW14 */ + if (tonga_populate_fuzzy_fan(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate Fuzzy Fan " + "Control parameters Failed !", + return -EINVAL); + + /* DW15-DW18 */ + if (tonga_populate_gnb_lpml(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate GnbLPML Failed !", + return -EINVAL); + + /* DW20 */ + if (tonga_populate_bapm_vddc_base_leakage_sidd(hwmgr)) + PP_ASSERT_WITH_CODE( + false, + "Attempt to populate BapmVddCBaseLeakage " + "Hi and Lo Sidd Failed !", + return -EINVAL); + + if (smu7_copy_bytes_to_smc(hwmgr, pm_fuse_table_offset, + (uint8_t *)&smu_data->power_tune_table, + sizeof(struct SMU72_Discrete_PmFuses), SMC_RAM_END)) + PP_ASSERT_WITH_CODE(false, + "Attempt to download PmFuseTable Failed !", + return -EINVAL); + } + return 0; +} + +static int tonga_populate_mc_reg_address(struct pp_hwmgr *hwmgr, + SMU72_Discrete_MCRegisters *mc_reg_table) +{ + const struct tonga_smumgr *smu_data = (struct tonga_smumgr *)hwmgr->smu_backend; + + uint32_t i, j; + + for (i = 0, j = 0; j < smu_data->mc_reg_table.last; j++) { + if (smu_data->mc_reg_table.validflag & 1<<j) { + PP_ASSERT_WITH_CODE( + i < SMU72_DISCRETE_MC_REGISTER_ARRAY_SIZE, + "Index of mc_reg_table->address[] array " + "out of boundary", + return -EINVAL); + mc_reg_table->address[i].s0 = + PP_HOST_TO_SMC_US(smu_data->mc_reg_table.mc_reg_address[j].s0); + mc_reg_table->address[i].s1 = + PP_HOST_TO_SMC_US(smu_data->mc_reg_table.mc_reg_address[j].s1); + i++; + } + } + + mc_reg_table->last = (uint8_t)i; + + return 0; +} + +/*convert register values from driver to SMC format */ +static void tonga_convert_mc_registers( + const struct tonga_mc_reg_entry *entry, + SMU72_Discrete_MCRegisterSet *data, + uint32_t num_entries, uint32_t valid_flag) +{ + uint32_t i, j; + + for (i = 0, j = 0; j < num_entries; j++) { + if (valid_flag & 1<<j) { + data->value[i] = PP_HOST_TO_SMC_UL(entry->mc_data[j]); + i++; + } + } +} + +static int tonga_convert_mc_reg_table_entry_to_smc( + struct pp_hwmgr *hwmgr, + const uint32_t memory_clock, + SMU72_Discrete_MCRegisterSet *mc_reg_table_data + ) +{ + struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); + uint32_t i = 0; + + for (i = 0; i < smu_data->mc_reg_table.num_entries; i++) { + if (memory_clock <= + smu_data->mc_reg_table.mc_reg_table_entry[i].mclk_max) { + break; + } + } + + if ((i == smu_data->mc_reg_table.num_entries) && (i > 0)) + --i; + + tonga_convert_mc_registers(&smu_data->mc_reg_table.mc_reg_table_entry[i], + mc_reg_table_data, smu_data->mc_reg_table.last, + smu_data->mc_reg_table.validflag); + + return 0; +} + +static int tonga_convert_mc_reg_table_to_smc(struct pp_hwmgr *hwmgr, + SMU72_Discrete_MCRegisters *mc_regs) +{ + int result = 0; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + int res; + uint32_t i; + + for (i = 0; i < data->dpm_table.mclk_table.count; i++) { + res = tonga_convert_mc_reg_table_entry_to_smc( + hwmgr, + data->dpm_table.mclk_table.dpm_levels[i].value, + &mc_regs->data[i] + ); + + if (0 != res) + result = res; + } + + return result; +} + +static int tonga_update_and_upload_mc_reg_table(struct pp_hwmgr *hwmgr) +{ + struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t address; + int32_t result; + + if (0 == (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) + return 0; + + + memset(&smu_data->mc_regs, 0, sizeof(SMU72_Discrete_MCRegisters)); + + result = tonga_convert_mc_reg_table_to_smc(hwmgr, &(smu_data->mc_regs)); + + if (result != 0) + return result; + + + address = smu_data->smu7_data.mc_reg_table_start + + (uint32_t)offsetof(SMU72_Discrete_MCRegisters, data[0]); + + return smu7_copy_bytes_to_smc( + hwmgr, address, + (uint8_t *)&smu_data->mc_regs.data[0], + sizeof(SMU72_Discrete_MCRegisterSet) * + data->dpm_table.mclk_table.count, + SMC_RAM_END); +} + +static int tonga_populate_initial_mc_reg_table(struct pp_hwmgr *hwmgr) +{ + int result; + struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); + + memset(&smu_data->mc_regs, 0x00, sizeof(SMU72_Discrete_MCRegisters)); + result = tonga_populate_mc_reg_address(hwmgr, &(smu_data->mc_regs)); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize MCRegTable for the MC register addresses !", + return result;); + + result = tonga_convert_mc_reg_table_to_smc(hwmgr, &smu_data->mc_regs); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize MCRegTable for driver state !", + return result;); + + return smu7_copy_bytes_to_smc(hwmgr, smu_data->smu7_data.mc_reg_table_start, + (uint8_t *)&smu_data->mc_regs, sizeof(SMU72_Discrete_MCRegisters), SMC_RAM_END); +} + +static void tonga_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr) +{ + struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + if (table_info && + table_info->cac_dtp_table->usPowerTuneDataSetID <= POWERTUNE_DEFAULT_SET_MAX && + table_info->cac_dtp_table->usPowerTuneDataSetID) + smu_data->power_tune_defaults = + &tonga_power_tune_data_set_array + [table_info->cac_dtp_table->usPowerTuneDataSetID - 1]; + else + smu_data->power_tune_defaults = &tonga_power_tune_data_set_array[0]; +} + +static void tonga_save_default_power_profile(struct pp_hwmgr *hwmgr) +{ + struct tonga_smumgr *data = (struct tonga_smumgr *)(hwmgr->smu_backend); + struct SMU72_Discrete_GraphicsLevel *levels = + data->smc_state_table.GraphicsLevel; + unsigned min_level = 1; + + hwmgr->default_gfx_power_profile.activity_threshold = + be16_to_cpu(levels[0].ActivityLevel); + hwmgr->default_gfx_power_profile.up_hyst = levels[0].UpHyst; + hwmgr->default_gfx_power_profile.down_hyst = levels[0].DownHyst; + hwmgr->default_gfx_power_profile.type = AMD_PP_GFX_PROFILE; + + hwmgr->default_compute_power_profile = hwmgr->default_gfx_power_profile; + hwmgr->default_compute_power_profile.type = AMD_PP_COMPUTE_PROFILE; + + /* Workaround compute SDMA instability: disable lowest SCLK + * DPM level. Optimize compute power profile: Use only highest + * 2 power levels (if more than 2 are available), Hysteresis: + * 0ms up, 5ms down + */ + if (data->smc_state_table.GraphicsDpmLevelCount > 2) + min_level = data->smc_state_table.GraphicsDpmLevelCount - 2; + else if (data->smc_state_table.GraphicsDpmLevelCount == 2) + min_level = 1; + else + min_level = 0; + hwmgr->default_compute_power_profile.min_sclk = + be32_to_cpu(levels[min_level].SclkFrequency); + hwmgr->default_compute_power_profile.up_hyst = 0; + hwmgr->default_compute_power_profile.down_hyst = 5; + + hwmgr->gfx_power_profile = hwmgr->default_gfx_power_profile; + hwmgr->compute_power_profile = hwmgr->default_compute_power_profile; +} + +static int tonga_init_smc_table(struct pp_hwmgr *hwmgr) +{ + int result; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct tonga_smumgr *smu_data = + (struct tonga_smumgr *)(hwmgr->smu_backend); + SMU72_Discrete_DpmTable *table = &(smu_data->smc_state_table); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + uint8_t i; + pp_atomctrl_gpio_pin_assignment gpio_pin_assignment; + + + memset(&(smu_data->smc_state_table), 0x00, sizeof(smu_data->smc_state_table)); + + tonga_initialize_power_tune_defaults(hwmgr); + + if (SMU7_VOLTAGE_CONTROL_NONE != data->voltage_control) + tonga_populate_smc_voltage_tables(hwmgr, table); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition)) + table->SystemFlags |= PPSMC_SYSTEMFLAG_GPIO_DC; + + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StepVddc)) + table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC; + + if (data->is_memory_gddr5) + table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5; + + i = PHM_READ_FIELD(hwmgr->device, CC_MC_MAX_CHANNEL, NOOFCHAN); + + if (i == 1 || i == 0) + table->SystemFlags |= 0x40; + + if (data->ulv_supported && table_info->us_ulv_voltage_offset) { + result = tonga_populate_ulv_state(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize ULV state !", + return result;); + + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixCG_ULV_PARAMETER, 0x40035); + } + + result = tonga_populate_smc_link_level(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize Link Level !", return result); + + result = tonga_populate_all_graphic_levels(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize Graphics Level !", return result); + + result = tonga_populate_all_memory_levels(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize Memory Level !", return result); + + result = tonga_populate_smc_acpi_level(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize ACPI Level !", return result); + + result = tonga_populate_smc_vce_level(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize VCE Level !", return result); + + result = tonga_populate_smc_acp_level(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize ACP Level !", return result); + + result = tonga_populate_smc_samu_level(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize SAMU Level !", return result); + + /* Since only the initial state is completely set up at this + * point (the other states are just copies of the boot state) we only + * need to populate the ARB settings for the initial state. + */ + result = tonga_program_memory_timing_parameters(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to Write ARB settings for the initial state.", + return result;); + + result = tonga_populate_smc_uvd_level(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize UVD Level !", return result); + + result = tonga_populate_smc_boot_level(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize Boot Level !", return result); + + tonga_populate_bapm_parameters_in_dpm_table(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to populate BAPM Parameters !", return result); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ClockStretcher)) { + result = tonga_populate_clock_stretcher_data_table(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to populate Clock Stretcher Data Table !", + return result;); + } + table->GraphicsVoltageChangeEnable = 1; + table->GraphicsThermThrottleEnable = 1; + table->GraphicsInterval = 1; + table->VoltageInterval = 1; + table->ThermalInterval = 1; + table->TemperatureLimitHigh = + table_info->cac_dtp_table->usTargetOperatingTemp * + SMU7_Q88_FORMAT_CONVERSION_UNIT; + table->TemperatureLimitLow = + (table_info->cac_dtp_table->usTargetOperatingTemp - 1) * + SMU7_Q88_FORMAT_CONVERSION_UNIT; + table->MemoryVoltageChangeEnable = 1; + table->MemoryInterval = 1; + table->VoltageResponseTime = 0; + table->PhaseResponseTime = 0; + table->MemoryThermThrottleEnable = 1; + + /* + * Cail reads current link status and reports it as cap (we cannot + * change this due to some previous issues we had) + * SMC drops the link status to lowest level after enabling + * DPM by PowerPlay. After pnp or toggling CF, driver gets reloaded again + * but this time Cail reads current link status which was set to low by + * SMC and reports it as cap to powerplay + * To avoid it, we set PCIeBootLinkLevel to highest dpm level + */ + PP_ASSERT_WITH_CODE((1 <= data->dpm_table.pcie_speed_table.count), + "There must be 1 or more PCIE levels defined in PPTable.", + return -EINVAL); + + table->PCIeBootLinkLevel = (uint8_t) (data->dpm_table.pcie_speed_table.count); + + table->PCIeGenInterval = 1; + + result = tonga_populate_vr_config(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to populate VRConfig setting !", return result); + + table->ThermGpio = 17; + table->SclkStepSize = 0x4000; + + if (atomctrl_get_pp_assign_pin(hwmgr, VDDC_VRHOT_GPIO_PINID, + &gpio_pin_assignment)) { + table->VRHotGpio = gpio_pin_assignment.uc_gpio_pin_bit_shift; + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_RegulatorHot); + } else { + table->VRHotGpio = SMU7_UNUSED_GPIO_PIN; + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_RegulatorHot); + } + + if (atomctrl_get_pp_assign_pin(hwmgr, PP_AC_DC_SWITCH_GPIO_PINID, + &gpio_pin_assignment)) { + table->AcDcGpio = gpio_pin_assignment.uc_gpio_pin_bit_shift; + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition); + } else { + table->AcDcGpio = SMU7_UNUSED_GPIO_PIN; + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition); + } + + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_Falcon_QuickTransition); + + if (0) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition); + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_Falcon_QuickTransition); + } + + if (atomctrl_get_pp_assign_pin(hwmgr, + THERMAL_INT_OUTPUT_GPIO_PINID, &gpio_pin_assignment)) { + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ThermalOutGPIO); + + table->ThermOutGpio = gpio_pin_assignment.uc_gpio_pin_bit_shift; + + table->ThermOutPolarity = + (0 == (cgs_read_register(hwmgr->device, mmGPIOPAD_A) & + (1 << gpio_pin_assignment.uc_gpio_pin_bit_shift))) ? 1 : 0; + + table->ThermOutMode = SMU7_THERM_OUT_MODE_THERM_ONLY; + + /* if required, combine VRHot/PCC with thermal out GPIO*/ + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_RegulatorHot) && + phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_CombinePCCWithThermalSignal)){ + table->ThermOutMode = SMU7_THERM_OUT_MODE_THERM_VRHOT; + } + } else { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ThermalOutGPIO); + + table->ThermOutGpio = 17; + table->ThermOutPolarity = 1; + table->ThermOutMode = SMU7_THERM_OUT_MODE_DISABLE; + } + + for (i = 0; i < SMU72_MAX_ENTRIES_SMIO; i++) + table->Smio[i] = PP_HOST_TO_SMC_UL(table->Smio[i]); + + CONVERT_FROM_HOST_TO_SMC_UL(table->SystemFlags); + CONVERT_FROM_HOST_TO_SMC_UL(table->VRConfig); + CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask1); + CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask2); + CONVERT_FROM_HOST_TO_SMC_UL(table->SclkStepSize); + CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitHigh); + CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitLow); + CONVERT_FROM_HOST_TO_SMC_US(table->VoltageResponseTime); + CONVERT_FROM_HOST_TO_SMC_US(table->PhaseResponseTime); + + /* Upload all dpm data to SMC memory.(dpm level, dpm level count etc) */ + result = smu7_copy_bytes_to_smc( + hwmgr, + smu_data->smu7_data.dpm_table_start + offsetof(SMU72_Discrete_DpmTable, SystemFlags), + (uint8_t *)&(table->SystemFlags), + sizeof(SMU72_Discrete_DpmTable) - 3 * sizeof(SMU72_PIDController), + SMC_RAM_END); + + PP_ASSERT_WITH_CODE(!result, + "Failed to upload dpm data to SMC memory !", return result;); + + result = tonga_init_arb_table_index(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to upload arb data to SMC memory !", return result); + + tonga_populate_pm_fuses(hwmgr); + PP_ASSERT_WITH_CODE((!result), + "Failed to populate initialize pm fuses !", return result); + + result = tonga_populate_initial_mc_reg_table(hwmgr); + PP_ASSERT_WITH_CODE((!result), + "Failed to populate initialize MC Reg table !", return result); + + tonga_save_default_power_profile(hwmgr); + + return 0; +} + +static int tonga_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) +{ + struct tonga_smumgr *smu_data = + (struct tonga_smumgr *)(hwmgr->smu_backend); + SMU72_Discrete_FanTable fan_table = { FDO_MODE_HARDWARE }; + uint32_t duty100; + uint32_t t_diff1, t_diff2, pwm_diff1, pwm_diff2; + uint16_t fdo_min, slope1, slope2; + uint32_t reference_clock; + int res; + uint64_t tmp64; + + if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl)) + return 0; + + if (hwmgr->thermal_controller.fanInfo.bNoFan) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + + if (0 == smu_data->smu7_data.fan_table_start) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + + duty100 = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, + CG_FDO_CTRL1, FMAX_DUTY100); + + if (0 == duty100) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + + tmp64 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMMin * duty100; + do_div(tmp64, 10000); + fdo_min = (uint16_t)tmp64; + + t_diff1 = hwmgr->thermal_controller.advanceFanControlParameters.usTMed - + hwmgr->thermal_controller.advanceFanControlParameters.usTMin; + t_diff2 = hwmgr->thermal_controller.advanceFanControlParameters.usTHigh - + hwmgr->thermal_controller.advanceFanControlParameters.usTMed; + + pwm_diff1 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMMed - + hwmgr->thermal_controller.advanceFanControlParameters.usPWMMin; + pwm_diff2 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMHigh - + hwmgr->thermal_controller.advanceFanControlParameters.usPWMMed; + + slope1 = (uint16_t)((50 + ((16 * duty100 * pwm_diff1) / t_diff1)) / 100); + slope2 = (uint16_t)((50 + ((16 * duty100 * pwm_diff2) / t_diff2)) / 100); + + fan_table.TempMin = cpu_to_be16((50 + hwmgr->thermal_controller.advanceFanControlParameters.usTMin) / 100); + fan_table.TempMed = cpu_to_be16((50 + hwmgr->thermal_controller.advanceFanControlParameters.usTMed) / 100); + fan_table.TempMax = cpu_to_be16((50 + hwmgr->thermal_controller.advanceFanControlParameters.usTMax) / 100); + + fan_table.Slope1 = cpu_to_be16(slope1); + fan_table.Slope2 = cpu_to_be16(slope2); + + fan_table.FdoMin = cpu_to_be16(fdo_min); + + fan_table.HystDown = cpu_to_be16(hwmgr->thermal_controller.advanceFanControlParameters.ucTHyst); + + fan_table.HystUp = cpu_to_be16(1); + + fan_table.HystSlope = cpu_to_be16(1); + + fan_table.TempRespLim = cpu_to_be16(5); + + reference_clock = smu7_get_xclk(hwmgr); + + fan_table.RefreshPeriod = cpu_to_be32((hwmgr->thermal_controller.advanceFanControlParameters.ulCycleDelay * reference_clock) / 1600); + + fan_table.FdoMax = cpu_to_be16((uint16_t)duty100); + + fan_table.TempSrc = (uint8_t)PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_MULT_THERMAL_CTRL, TEMP_SEL); + + fan_table.FanControl_GL_Flag = 1; + + res = smu7_copy_bytes_to_smc(hwmgr, + smu_data->smu7_data.fan_table_start, + (uint8_t *)&fan_table, + (uint32_t)sizeof(fan_table), + SMC_RAM_END); + + return 0; +} + + +static int tonga_program_mem_timing_parameters(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + if (data->need_update_smu7_dpm_table & + (DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_OD_UPDATE_MCLK)) + return tonga_program_memory_timing_parameters(hwmgr); + + return 0; +} + +static int tonga_update_sclk_threshold(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct tonga_smumgr *smu_data = + (struct tonga_smumgr *)(hwmgr->smu_backend); + + int result = 0; + uint32_t low_sclk_interrupt_threshold = 0; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SclkThrottleLowNotification) + && (hwmgr->gfx_arbiter.sclk_threshold != + data->low_sclk_interrupt_threshold)) { + data->low_sclk_interrupt_threshold = + hwmgr->gfx_arbiter.sclk_threshold; + low_sclk_interrupt_threshold = + data->low_sclk_interrupt_threshold; + + CONVERT_FROM_HOST_TO_SMC_UL(low_sclk_interrupt_threshold); + + result = smu7_copy_bytes_to_smc( + hwmgr, + smu_data->smu7_data.dpm_table_start + + offsetof(SMU72_Discrete_DpmTable, + LowSclkInterruptThreshold), + (uint8_t *)&low_sclk_interrupt_threshold, + sizeof(uint32_t), + SMC_RAM_END); + } + + result = tonga_update_and_upload_mc_reg_table(hwmgr); + + PP_ASSERT_WITH_CODE((!result), + "Failed to upload MC reg table !", + return result); + + result = tonga_program_mem_timing_parameters(hwmgr); + PP_ASSERT_WITH_CODE((result == 0), + "Failed to program memory timing parameters !", + ); + + return result; +} + +static uint32_t tonga_get_offsetof(uint32_t type, uint32_t member) +{ + switch (type) { + case SMU_SoftRegisters: + switch (member) { + case HandshakeDisables: + return offsetof(SMU72_SoftRegisters, HandshakeDisables); + case VoltageChangeTimeout: + return offsetof(SMU72_SoftRegisters, VoltageChangeTimeout); + case AverageGraphicsActivity: + return offsetof(SMU72_SoftRegisters, AverageGraphicsActivity); + case PreVBlankGap: + return offsetof(SMU72_SoftRegisters, PreVBlankGap); + case VBlankTimeout: + return offsetof(SMU72_SoftRegisters, VBlankTimeout); + case UcodeLoadStatus: + return offsetof(SMU72_SoftRegisters, UcodeLoadStatus); + case DRAM_LOG_ADDR_H: + return offsetof(SMU72_SoftRegisters, DRAM_LOG_ADDR_H); + case DRAM_LOG_ADDR_L: + return offsetof(SMU72_SoftRegisters, DRAM_LOG_ADDR_L); + case DRAM_LOG_PHY_ADDR_H: + return offsetof(SMU72_SoftRegisters, DRAM_LOG_PHY_ADDR_H); + case DRAM_LOG_PHY_ADDR_L: + return offsetof(SMU72_SoftRegisters, DRAM_LOG_PHY_ADDR_L); + case DRAM_LOG_BUFF_SIZE: + return offsetof(SMU72_SoftRegisters, DRAM_LOG_BUFF_SIZE); + } + case SMU_Discrete_DpmTable: + switch (member) { + case UvdBootLevel: + return offsetof(SMU72_Discrete_DpmTable, UvdBootLevel); + case VceBootLevel: + return offsetof(SMU72_Discrete_DpmTable, VceBootLevel); + case SamuBootLevel: + return offsetof(SMU72_Discrete_DpmTable, SamuBootLevel); + case LowSclkInterruptThreshold: + return offsetof(SMU72_Discrete_DpmTable, LowSclkInterruptThreshold); + } + } + pr_warn("can't get the offset of type %x member %x\n", type, member); + return 0; +} + +static uint32_t tonga_get_mac_definition(uint32_t value) +{ + switch (value) { + case SMU_MAX_LEVELS_GRAPHICS: + return SMU72_MAX_LEVELS_GRAPHICS; + case SMU_MAX_LEVELS_MEMORY: + return SMU72_MAX_LEVELS_MEMORY; + case SMU_MAX_LEVELS_LINK: + return SMU72_MAX_LEVELS_LINK; + case SMU_MAX_ENTRIES_SMIO: + return SMU72_MAX_ENTRIES_SMIO; + case SMU_MAX_LEVELS_VDDC: + return SMU72_MAX_LEVELS_VDDC; + case SMU_MAX_LEVELS_VDDGFX: + return SMU72_MAX_LEVELS_VDDGFX; + case SMU_MAX_LEVELS_VDDCI: + return SMU72_MAX_LEVELS_VDDCI; + case SMU_MAX_LEVELS_MVDD: + return SMU72_MAX_LEVELS_MVDD; + } + pr_warn("can't get the mac value %x\n", value); + + return 0; +} + +static int tonga_update_uvd_smc_table(struct pp_hwmgr *hwmgr) +{ + struct tonga_smumgr *smu_data = + (struct tonga_smumgr *)(hwmgr->smu_backend); + uint32_t mm_boot_level_offset, mm_boot_level_value; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + smu_data->smc_state_table.UvdBootLevel = 0; + if (table_info->mm_dep_table->count > 0) + smu_data->smc_state_table.UvdBootLevel = + (uint8_t) (table_info->mm_dep_table->count - 1); + mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + + offsetof(SMU72_Discrete_DpmTable, UvdBootLevel); + mm_boot_level_offset /= 4; + mm_boot_level_offset *= 4; + mm_boot_level_value = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset); + mm_boot_level_value &= 0x00FFFFFF; + mm_boot_level_value |= smu_data->smc_state_table.UvdBootLevel << 24; + cgs_write_ind_register(hwmgr->device, + CGS_IND_REG__SMC, + mm_boot_level_offset, mm_boot_level_value); + + if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_UVDDPM) || + phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StablePState)) + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_UVDDPM_SetEnabledMask, + (uint32_t)(1 << smu_data->smc_state_table.UvdBootLevel)); + return 0; +} + +static int tonga_update_vce_smc_table(struct pp_hwmgr *hwmgr) +{ + struct tonga_smumgr *smu_data = + (struct tonga_smumgr *)(hwmgr->smu_backend); + uint32_t mm_boot_level_offset, mm_boot_level_value; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + + smu_data->smc_state_table.VceBootLevel = + (uint8_t) (table_info->mm_dep_table->count - 1); + + mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + + offsetof(SMU72_Discrete_DpmTable, VceBootLevel); + mm_boot_level_offset /= 4; + mm_boot_level_offset *= 4; + mm_boot_level_value = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset); + mm_boot_level_value &= 0xFF00FFFF; + mm_boot_level_value |= smu_data->smc_state_table.VceBootLevel << 16; + cgs_write_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StablePState)) + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_VCEDPM_SetEnabledMask, + (uint32_t)1 << smu_data->smc_state_table.VceBootLevel); + return 0; +} + +static int tonga_update_samu_smc_table(struct pp_hwmgr *hwmgr) +{ + struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); + uint32_t mm_boot_level_offset, mm_boot_level_value; + + smu_data->smc_state_table.SamuBootLevel = 0; + mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + + offsetof(SMU72_Discrete_DpmTable, SamuBootLevel); + + mm_boot_level_offset /= 4; + mm_boot_level_offset *= 4; + mm_boot_level_value = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset); + mm_boot_level_value &= 0xFFFFFF00; + mm_boot_level_value |= smu_data->smc_state_table.SamuBootLevel << 0; + cgs_write_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StablePState)) + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SAMUDPM_SetEnabledMask, + (uint32_t)(1 << smu_data->smc_state_table.SamuBootLevel)); + return 0; +} + +static int tonga_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type) +{ + switch (type) { + case SMU_UVD_TABLE: + tonga_update_uvd_smc_table(hwmgr); + break; + case SMU_VCE_TABLE: + tonga_update_vce_smc_table(hwmgr); + break; + case SMU_SAMU_TABLE: + tonga_update_samu_smc_table(hwmgr); + break; + default: + break; + } + return 0; +} + +static int tonga_process_firmware_header(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); + + uint32_t tmp; + int result; + bool error = false; + + result = smu7_read_smc_sram_dword(hwmgr, + SMU72_FIRMWARE_HEADER_LOCATION + + offsetof(SMU72_Firmware_Header, DpmTable), + &tmp, SMC_RAM_END); + + if (!result) + smu_data->smu7_data.dpm_table_start = tmp; + + error |= (result != 0); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU72_FIRMWARE_HEADER_LOCATION + + offsetof(SMU72_Firmware_Header, SoftRegisters), + &tmp, SMC_RAM_END); + + if (!result) { + data->soft_regs_start = tmp; + smu_data->smu7_data.soft_regs_start = tmp; + } + + error |= (result != 0); + + + result = smu7_read_smc_sram_dword(hwmgr, + SMU72_FIRMWARE_HEADER_LOCATION + + offsetof(SMU72_Firmware_Header, mcRegisterTable), + &tmp, SMC_RAM_END); + + if (!result) + smu_data->smu7_data.mc_reg_table_start = tmp; + + result = smu7_read_smc_sram_dword(hwmgr, + SMU72_FIRMWARE_HEADER_LOCATION + + offsetof(SMU72_Firmware_Header, FanTable), + &tmp, SMC_RAM_END); + + if (!result) + smu_data->smu7_data.fan_table_start = tmp; + + error |= (result != 0); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU72_FIRMWARE_HEADER_LOCATION + + offsetof(SMU72_Firmware_Header, mcArbDramTimingTable), + &tmp, SMC_RAM_END); + + if (!result) + smu_data->smu7_data.arb_table_start = tmp; + + error |= (result != 0); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU72_FIRMWARE_HEADER_LOCATION + + offsetof(SMU72_Firmware_Header, Version), + &tmp, SMC_RAM_END); + + if (!result) + hwmgr->microcode_version_info.SMC = tmp; + + error |= (result != 0); + + return error ? 1 : 0; +} + +/*---------------------------MC----------------------------*/ + +static uint8_t tonga_get_memory_modile_index(struct pp_hwmgr *hwmgr) +{ + return (uint8_t) (0xFF & (cgs_read_register(hwmgr->device, mmBIOS_SCRATCH_4) >> 16)); +} + +static bool tonga_check_s0_mc_reg_index(uint16_t in_reg, uint16_t *out_reg) +{ + bool result = true; + + switch (in_reg) { + case mmMC_SEQ_RAS_TIMING: + *out_reg = mmMC_SEQ_RAS_TIMING_LP; + break; + + case mmMC_SEQ_DLL_STBY: + *out_reg = mmMC_SEQ_DLL_STBY_LP; + break; + + case mmMC_SEQ_G5PDX_CMD0: + *out_reg = mmMC_SEQ_G5PDX_CMD0_LP; + break; + + case mmMC_SEQ_G5PDX_CMD1: + *out_reg = mmMC_SEQ_G5PDX_CMD1_LP; + break; + + case mmMC_SEQ_G5PDX_CTRL: + *out_reg = mmMC_SEQ_G5PDX_CTRL_LP; + break; + + case mmMC_SEQ_CAS_TIMING: + *out_reg = mmMC_SEQ_CAS_TIMING_LP; + break; + + case mmMC_SEQ_MISC_TIMING: + *out_reg = mmMC_SEQ_MISC_TIMING_LP; + break; + + case mmMC_SEQ_MISC_TIMING2: + *out_reg = mmMC_SEQ_MISC_TIMING2_LP; + break; + + case mmMC_SEQ_PMG_DVS_CMD: + *out_reg = mmMC_SEQ_PMG_DVS_CMD_LP; + break; + + case mmMC_SEQ_PMG_DVS_CTL: + *out_reg = mmMC_SEQ_PMG_DVS_CTL_LP; + break; + + case mmMC_SEQ_RD_CTL_D0: + *out_reg = mmMC_SEQ_RD_CTL_D0_LP; + break; + + case mmMC_SEQ_RD_CTL_D1: + *out_reg = mmMC_SEQ_RD_CTL_D1_LP; + break; + + case mmMC_SEQ_WR_CTL_D0: + *out_reg = mmMC_SEQ_WR_CTL_D0_LP; + break; + + case mmMC_SEQ_WR_CTL_D1: + *out_reg = mmMC_SEQ_WR_CTL_D1_LP; + break; + + case mmMC_PMG_CMD_EMRS: + *out_reg = mmMC_SEQ_PMG_CMD_EMRS_LP; + break; + + case mmMC_PMG_CMD_MRS: + *out_reg = mmMC_SEQ_PMG_CMD_MRS_LP; + break; + + case mmMC_PMG_CMD_MRS1: + *out_reg = mmMC_SEQ_PMG_CMD_MRS1_LP; + break; + + case mmMC_SEQ_PMG_TIMING: + *out_reg = mmMC_SEQ_PMG_TIMING_LP; + break; + + case mmMC_PMG_CMD_MRS2: + *out_reg = mmMC_SEQ_PMG_CMD_MRS2_LP; + break; + + case mmMC_SEQ_WR_CTL_2: + *out_reg = mmMC_SEQ_WR_CTL_2_LP; + break; + + default: + result = false; + break; + } + + return result; +} + +static int tonga_set_s0_mc_reg_index(struct tonga_mc_reg_table *table) +{ + uint32_t i; + uint16_t address; + + for (i = 0; i < table->last; i++) { + table->mc_reg_address[i].s0 = + tonga_check_s0_mc_reg_index(table->mc_reg_address[i].s1, + &address) ? + address : + table->mc_reg_address[i].s1; + } + return 0; +} + +static int tonga_copy_vbios_smc_reg_table(const pp_atomctrl_mc_reg_table *table, + struct tonga_mc_reg_table *ni_table) +{ + uint8_t i, j; + + PP_ASSERT_WITH_CODE((table->last <= SMU72_DISCRETE_MC_REGISTER_ARRAY_SIZE), + "Invalid VramInfo table.", return -EINVAL); + PP_ASSERT_WITH_CODE((table->num_entries <= MAX_AC_TIMING_ENTRIES), + "Invalid VramInfo table.", return -EINVAL); + + for (i = 0; i < table->last; i++) + ni_table->mc_reg_address[i].s1 = table->mc_reg_address[i].s1; + + ni_table->last = table->last; + + for (i = 0; i < table->num_entries; i++) { + ni_table->mc_reg_table_entry[i].mclk_max = + table->mc_reg_table_entry[i].mclk_max; + for (j = 0; j < table->last; j++) { + ni_table->mc_reg_table_entry[i].mc_data[j] = + table->mc_reg_table_entry[i].mc_data[j]; + } + } + + ni_table->num_entries = table->num_entries; + + return 0; +} + +static int tonga_set_mc_special_registers(struct pp_hwmgr *hwmgr, + struct tonga_mc_reg_table *table) +{ + uint8_t i, j, k; + uint32_t temp_reg; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + for (i = 0, j = table->last; i < table->last; i++) { + PP_ASSERT_WITH_CODE((j < SMU72_DISCRETE_MC_REGISTER_ARRAY_SIZE), + "Invalid VramInfo table.", return -EINVAL); + + switch (table->mc_reg_address[i].s1) { + + case mmMC_SEQ_MISC1: + temp_reg = cgs_read_register(hwmgr->device, + mmMC_PMG_CMD_EMRS); + table->mc_reg_address[j].s1 = mmMC_PMG_CMD_EMRS; + table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_EMRS_LP; + for (k = 0; k < table->num_entries; k++) { + table->mc_reg_table_entry[k].mc_data[j] = + ((temp_reg & 0xffff0000)) | + ((table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16); + } + j++; + PP_ASSERT_WITH_CODE((j < SMU72_DISCRETE_MC_REGISTER_ARRAY_SIZE), + "Invalid VramInfo table.", return -EINVAL); + + temp_reg = cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS); + table->mc_reg_address[j].s1 = mmMC_PMG_CMD_MRS; + table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_MRS_LP; + for (k = 0; k < table->num_entries; k++) { + table->mc_reg_table_entry[k].mc_data[j] = + (temp_reg & 0xffff0000) | + (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); + + if (!data->is_memory_gddr5) + table->mc_reg_table_entry[k].mc_data[j] |= 0x100; + } + j++; + PP_ASSERT_WITH_CODE((j <= SMU72_DISCRETE_MC_REGISTER_ARRAY_SIZE), + "Invalid VramInfo table.", return -EINVAL); + + if (!data->is_memory_gddr5) { + table->mc_reg_address[j].s1 = mmMC_PMG_AUTO_CMD; + table->mc_reg_address[j].s0 = mmMC_PMG_AUTO_CMD; + for (k = 0; k < table->num_entries; k++) + table->mc_reg_table_entry[k].mc_data[j] = + (table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16; + j++; + PP_ASSERT_WITH_CODE((j <= SMU72_DISCRETE_MC_REGISTER_ARRAY_SIZE), + "Invalid VramInfo table.", return -EINVAL); + } + + break; + + case mmMC_SEQ_RESERVE_M: + temp_reg = cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS1); + table->mc_reg_address[j].s1 = mmMC_PMG_CMD_MRS1; + table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_MRS1_LP; + for (k = 0; k < table->num_entries; k++) { + table->mc_reg_table_entry[k].mc_data[j] = + (temp_reg & 0xffff0000) | + (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); + } + j++; + PP_ASSERT_WITH_CODE((j <= SMU72_DISCRETE_MC_REGISTER_ARRAY_SIZE), + "Invalid VramInfo table.", return -EINVAL); + break; + + default: + break; + } + + } + + table->last = j; + + return 0; +} + +static int tonga_set_valid_flag(struct tonga_mc_reg_table *table) +{ + uint8_t i, j; + + for (i = 0; i < table->last; i++) { + for (j = 1; j < table->num_entries; j++) { + if (table->mc_reg_table_entry[j-1].mc_data[i] != + table->mc_reg_table_entry[j].mc_data[i]) { + table->validflag |= (1<<i); + break; + } + } + } + + return 0; +} + +static int tonga_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) +{ + int result; + struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); + pp_atomctrl_mc_reg_table *table; + struct tonga_mc_reg_table *ni_table = &smu_data->mc_reg_table; + uint8_t module_index = tonga_get_memory_modile_index(hwmgr); + + table = kzalloc(sizeof(pp_atomctrl_mc_reg_table), GFP_KERNEL); + + if (table == NULL) + return -ENOMEM; + + /* Program additional LP registers that are no longer programmed by VBIOS */ + cgs_write_register(hwmgr->device, mmMC_SEQ_RAS_TIMING_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_RAS_TIMING)); + cgs_write_register(hwmgr->device, mmMC_SEQ_CAS_TIMING_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_CAS_TIMING)); + cgs_write_register(hwmgr->device, mmMC_SEQ_DLL_STBY_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_DLL_STBY)); + cgs_write_register(hwmgr->device, mmMC_SEQ_G5PDX_CMD0_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_G5PDX_CMD0)); + cgs_write_register(hwmgr->device, mmMC_SEQ_G5PDX_CMD1_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_G5PDX_CMD1)); + cgs_write_register(hwmgr->device, mmMC_SEQ_G5PDX_CTRL_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_G5PDX_CTRL)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_DVS_CMD_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_PMG_DVS_CMD)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_DVS_CTL_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_PMG_DVS_CTL)); + cgs_write_register(hwmgr->device, mmMC_SEQ_MISC_TIMING_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_MISC_TIMING)); + cgs_write_register(hwmgr->device, mmMC_SEQ_MISC_TIMING2_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_MISC_TIMING2)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_EMRS_LP, + cgs_read_register(hwmgr->device, mmMC_PMG_CMD_EMRS)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_MRS_LP, + cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_MRS1_LP, + cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS1)); + cgs_write_register(hwmgr->device, mmMC_SEQ_WR_CTL_D0_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_WR_CTL_D0)); + cgs_write_register(hwmgr->device, mmMC_SEQ_WR_CTL_D1_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_WR_CTL_D1)); + cgs_write_register(hwmgr->device, mmMC_SEQ_RD_CTL_D0_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_RD_CTL_D0)); + cgs_write_register(hwmgr->device, mmMC_SEQ_RD_CTL_D1_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_RD_CTL_D1)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_TIMING_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_PMG_TIMING)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_MRS2_LP, + cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS2)); + cgs_write_register(hwmgr->device, mmMC_SEQ_WR_CTL_2_LP, + cgs_read_register(hwmgr->device, mmMC_SEQ_WR_CTL_2)); + + memset(table, 0x00, sizeof(pp_atomctrl_mc_reg_table)); + + result = atomctrl_initialize_mc_reg_table(hwmgr, module_index, table); + + if (!result) + result = tonga_copy_vbios_smc_reg_table(table, ni_table); + + if (!result) { + tonga_set_s0_mc_reg_index(ni_table); + result = tonga_set_mc_special_registers(hwmgr, ni_table); + } + + if (!result) + tonga_set_valid_flag(ni_table); + + kfree(table); + + return result; +} + +static bool tonga_is_dpm_running(struct pp_hwmgr *hwmgr) +{ + return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) + ? true : false; +} + +static int tonga_populate_requested_graphic_levels(struct pp_hwmgr *hwmgr, + struct amd_pp_profile *request) +{ + struct tonga_smumgr *smu_data = (struct tonga_smumgr *) + (hwmgr->smu_backend); + struct SMU72_Discrete_GraphicsLevel *levels = + smu_data->smc_state_table.GraphicsLevel; + uint32_t array = smu_data->smu7_data.dpm_table_start + + offsetof(SMU72_Discrete_DpmTable, GraphicsLevel); + uint32_t array_size = sizeof(struct SMU72_Discrete_GraphicsLevel) * + SMU72_MAX_LEVELS_GRAPHICS; + uint32_t i; + + for (i = 0; i < smu_data->smc_state_table.GraphicsDpmLevelCount; i++) { + levels[i].ActivityLevel = + cpu_to_be16(request->activity_threshold); + levels[i].EnabledForActivity = 1; + levels[i].UpHyst = request->up_hyst; + levels[i].DownHyst = request->down_hyst; + } + + return smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels, + array_size, SMC_RAM_END); +} + const struct pp_smumgr_func tonga_smu_funcs = { .smu_init = &tonga_smu_init, .smu_fini = &smu7_smu_fini, diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.h index 8c4f761d5bc8..5d70a00348e2 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.h @@ -25,8 +25,26 @@ #define _TONGA_SMUMGR_H_ #include "smu72_discrete.h" - #include "smu7_smumgr.h" +#include "smu72.h" + + +#define ASICID_IS_TONGA_P(wDID, bRID) \ + (((wDID == 0x6930) && ((bRID == 0xF0) || (bRID == 0xF1) || (bRID == 0xFF))) \ + || ((wDID == 0x6920) && ((bRID == 0) || (bRID == 1)))) + +struct tonga_pt_defaults { + uint8_t svi_load_line_en; + uint8_t svi_load_line_vddC; + uint8_t tdc_vddc_throttle_release_limit_perc; + uint8_t tdc_mawt; + uint8_t tdc_waterfall_ctl; + uint8_t dte_ambient_temp_base; + uint32_t display_cac; + uint32_t bapm_temp_gradient; + uint16_t bapmti_r[SMU72_DTE_ITERATIONS * SMU72_DTE_SOURCES * SMU72_DTE_SINKS]; + uint16_t bapmti_rc[SMU72_DTE_ITERATIONS * SMU72_DTE_SOURCES * SMU72_DTE_SINKS]; +}; struct tonga_mc_reg_entry { uint32_t mclk_max; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h index 8bd38102b58e..283a0dc25e84 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #if !defined(_GPU_SCHED_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) #define _GPU_SCHED_TRACE_H_ diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 08e1332d814a..92ec663fdada 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -133,6 +133,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, entity->rq = rq; entity->sched = sched; + spin_lock_init(&entity->rq_lock); spin_lock_init(&entity->queue_lock); r = kfifo_alloc(&entity->job_queue, jobs * sizeof(void *), GFP_KERNEL); if (r) @@ -187,7 +188,7 @@ static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity) if (kfifo_is_empty(&entity->job_queue)) return false; - if (ACCESS_ONCE(entity->dependency)) + if (READ_ONCE(entity->dependency)) return false; return true; @@ -204,7 +205,6 @@ static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity) void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity) { - struct amd_sched_rq *rq = entity->rq; int r; if (!amd_sched_entity_is_initialized(sched, entity)) @@ -218,7 +218,7 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, else r = wait_event_killable(sched->job_scheduled, amd_sched_entity_is_idle(entity)); - amd_sched_rq_remove_entity(rq, entity); + amd_sched_entity_set_rq(entity, NULL); if (r) { struct amd_sched_job *job; @@ -257,6 +257,24 @@ static void amd_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb dma_fence_put(f); } +void amd_sched_entity_set_rq(struct amd_sched_entity *entity, + struct amd_sched_rq *rq) +{ + if (entity->rq == rq) + return; + + spin_lock(&entity->rq_lock); + + if (entity->rq) + amd_sched_rq_remove_entity(entity->rq, entity); + + entity->rq = rq; + if (rq) + amd_sched_rq_add_entity(rq, entity); + + spin_unlock(&entity->rq_lock); +} + bool amd_sched_dependency_optimized(struct dma_fence* fence, struct amd_sched_entity *entity) { @@ -354,7 +372,9 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job) /* first job wakes up scheduler */ if (first) { /* Add the entity to the run queue */ + spin_lock(&entity->rq_lock); amd_sched_rq_add_entity(entity->rq, entity); + spin_unlock(&entity->rq_lock); amd_sched_wakeup(sched); } return added; @@ -386,6 +406,7 @@ static void amd_sched_job_finish(struct work_struct *work) schedule_delayed_work(&next->work_tdr, sched->timeout); } spin_unlock(&sched->job_list_lock); + dma_fence_put(&s_job->s_fence->finished); sched->ops->free_job(s_job); } @@ -566,6 +587,7 @@ static void amd_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb) container_of(cb, struct amd_sched_fence, cb); struct amd_gpu_scheduler *sched = s_fence->sched; + dma_fence_get(&s_fence->finished); atomic_dec(&sched->hw_rq_count); amd_sched_fence_finished(s_fence); @@ -618,9 +640,6 @@ static int amd_sched_main(void *param) fence = sched->ops->run_job(sched_job); amd_sched_fence_scheduled(s_fence); - /* amd_sched_process_job drops the job's reference of the fence. */ - sched_job->s_fence = NULL; - if (fence) { s_fence->parent = dma_fence_get(fence); r = dma_fence_add_callback(fence, &s_fence->cb, diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index f9d8f28efd16..52c8e5447624 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -39,6 +39,7 @@ struct amd_sched_rq; struct amd_sched_entity { struct list_head list; struct amd_sched_rq *rq; + spinlock_t rq_lock; struct amd_gpu_scheduler *sched; spinlock_t queue_lock; @@ -115,9 +116,14 @@ struct amd_sched_backend_ops { enum amd_sched_priority { AMD_SCHED_PRIORITY_MIN, - AMD_SCHED_PRIORITY_NORMAL = AMD_SCHED_PRIORITY_MIN, + AMD_SCHED_PRIORITY_LOW = AMD_SCHED_PRIORITY_MIN, + AMD_SCHED_PRIORITY_NORMAL, + AMD_SCHED_PRIORITY_HIGH_SW, + AMD_SCHED_PRIORITY_HIGH_HW, AMD_SCHED_PRIORITY_KERNEL, - AMD_SCHED_PRIORITY_MAX + AMD_SCHED_PRIORITY_MAX, + AMD_SCHED_PRIORITY_INVALID = -1, + AMD_SCHED_PRIORITY_UNSET = -2 }; /** @@ -150,6 +156,8 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity); void amd_sched_entity_push_job(struct amd_sched_job *sched_job); +void amd_sched_entity_set_rq(struct amd_sched_entity *entity, + struct amd_sched_rq *rq); int amd_sched_fence_slab_init(void); void amd_sched_fence_slab_fini(void); @@ -167,4 +175,11 @@ void amd_sched_job_recovery(struct amd_gpu_scheduler *sched); bool amd_sched_dependency_optimized(struct dma_fence* fence, struct amd_sched_entity *entity); void amd_sched_job_kickout(struct amd_sched_job *s_job); + +static inline enum amd_sched_priority +amd_sched_get_job_priority(struct amd_sched_job *job) +{ + return (job->s_entity->rq - job->sched->sched_rq); +} + #endif diff --git a/drivers/gpu/drm/arm/hdlcd_drv.h b/drivers/gpu/drm/arm/hdlcd_drv.h index e3950a071152..56f34dfff640 100644 --- a/drivers/gpu/drm/arm/hdlcd_drv.h +++ b/drivers/gpu/drm/arm/hdlcd_drv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * ARM HDLCD Controller register definition */ diff --git a/drivers/gpu/drm/armada/Makefile b/drivers/gpu/drm/armada/Makefile index a18f156c8b66..ecf25cf9f9f5 100644 --- a/drivers/gpu/drm/armada/Makefile +++ b/drivers/gpu/drm/armada/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 armada-y := armada_crtc.o armada_drv.o armada_fb.o armada_fbdev.o \ armada_gem.o armada_overlay.o armada_trace.o armada-y += armada_510.o diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c index 2a4d163ac76f..2e065facdce7 100644 --- a/drivers/gpu/drm/armada/armada_crtc.c +++ b/drivers/gpu/drm/armada/armada_crtc.c @@ -298,7 +298,7 @@ static void armada_drm_crtc_finish_fb(struct armada_crtc *dcrtc, if (force) { /* Display is disabled, so just drop the old fb */ - drm_framebuffer_unreference(fb); + drm_framebuffer_put(fb); return; } @@ -321,7 +321,7 @@ static void armada_drm_crtc_finish_fb(struct armada_crtc *dcrtc, * the best. The worst that will happen is the buffer gets * reused before it has finished being displayed. */ - drm_framebuffer_unreference(fb); + drm_framebuffer_put(fb); } static void armada_drm_vblank_off(struct armada_crtc *dcrtc) @@ -577,7 +577,7 @@ static int armada_drm_crtc_mode_set(struct drm_crtc *crtc, unsigned i; bool interlaced; - drm_framebuffer_reference(crtc->primary->fb); + drm_framebuffer_get(crtc->primary->fb); interlaced = !!(adj->flags & DRM_MODE_FLAG_INTERLACE); @@ -718,7 +718,7 @@ static int armada_drm_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y, MAX_SCHEDULE_TIMEOUT); /* Take a reference to the new fb as we're using it */ - drm_framebuffer_reference(crtc->primary->fb); + drm_framebuffer_get(crtc->primary->fb); /* Update the base in the CRTC */ armada_drm_crtc_update_regs(dcrtc, regs); @@ -742,7 +742,7 @@ void armada_drm_crtc_plane_disable(struct armada_crtc *dcrtc, * primary plane. */ if (plane->fb) - drm_framebuffer_unreference(plane->fb); + drm_framebuffer_put(plane->fb); /* Power down the Y/U/V FIFOs */ sram_para1 = CFG_PDWN16x66 | CFG_PDWN32x66; @@ -947,13 +947,13 @@ static int armada_drm_crtc_cursor_set(struct drm_crtc *crtc, /* Must be a kernel-mapped object */ if (!obj->addr) { - drm_gem_object_unreference_unlocked(&obj->obj); + drm_gem_object_put_unlocked(&obj->obj); return -EINVAL; } if (obj->obj.size < w * h * 4) { DRM_ERROR("buffer is too small\n"); - drm_gem_object_unreference_unlocked(&obj->obj); + drm_gem_object_put_unlocked(&obj->obj); return -ENOMEM; } } @@ -961,7 +961,7 @@ static int armada_drm_crtc_cursor_set(struct drm_crtc *crtc, if (dcrtc->cursor_obj) { dcrtc->cursor_obj->update = NULL; dcrtc->cursor_obj->update_data = NULL; - drm_gem_object_unreference_unlocked(&dcrtc->cursor_obj->obj); + drm_gem_object_put_unlocked(&dcrtc->cursor_obj->obj); } dcrtc->cursor_obj = obj; dcrtc->cursor_w = w; @@ -997,7 +997,7 @@ static void armada_drm_crtc_destroy(struct drm_crtc *crtc) struct armada_private *priv = crtc->dev->dev_private; if (dcrtc->cursor_obj) - drm_gem_object_unreference_unlocked(&dcrtc->cursor_obj->obj); + drm_gem_object_put_unlocked(&dcrtc->cursor_obj->obj); priv->dcrtc[dcrtc->num] = NULL; drm_crtc_cleanup(&dcrtc->crtc); @@ -1045,12 +1045,12 @@ static int armada_drm_crtc_page_flip(struct drm_crtc *crtc, * Ensure that we hold a reference on the new framebuffer. * This has to match the behaviour in mode_set. */ - drm_framebuffer_reference(fb); + drm_framebuffer_get(fb); ret = armada_drm_crtc_queue_frame_work(dcrtc, work); if (ret) { /* Undo our reference above */ - drm_framebuffer_unreference(fb); + drm_framebuffer_put(fb); kfree(work); return ret; } diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c index 2d45103d06cb..e857b88a9799 100644 --- a/drivers/gpu/drm/armada/armada_drv.c +++ b/drivers/gpu/drm/armada/armada_drv.c @@ -25,7 +25,7 @@ static void armada_drm_unref_work(struct work_struct *work) struct drm_framebuffer *fb; while (kfifo_get(&priv->fb_unref, &fb)) - drm_framebuffer_unreference(fb); + drm_framebuffer_put(fb); } /* Must be called with dev->event_lock held */ diff --git a/drivers/gpu/drm/armada/armada_fb.c b/drivers/gpu/drm/armada/armada_fb.c index b9e1637cc4cf..a38d5a0892a9 100644 --- a/drivers/gpu/drm/armada/armada_fb.c +++ b/drivers/gpu/drm/armada/armada_fb.c @@ -17,7 +17,7 @@ static void armada_fb_destroy(struct drm_framebuffer *fb) struct armada_framebuffer *dfb = drm_fb_to_armada_fb(fb); drm_framebuffer_cleanup(&dfb->fb); - drm_gem_object_unreference_unlocked(&dfb->obj->obj); + drm_gem_object_put_unlocked(&dfb->obj->obj); kfree(dfb); } @@ -94,7 +94,7 @@ struct armada_framebuffer *armada_framebuffer_create(struct drm_device *dev, * the above call, but the caller will drop their reference * to it. Hence we need to take our own reference. */ - drm_gem_object_reference(&obj->obj); + drm_gem_object_get(&obj->obj); return dfb; } @@ -143,12 +143,12 @@ static struct drm_framebuffer *armada_fb_create(struct drm_device *dev, goto err; } - drm_gem_object_unreference_unlocked(&obj->obj); + drm_gem_object_put_unlocked(&obj->obj); return &dfb->fb; err_unref: - drm_gem_object_unreference_unlocked(&obj->obj); + drm_gem_object_put_unlocked(&obj->obj); err: DRM_ERROR("failed to initialize framebuffer: %d\n", ret); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/armada/armada_fbdev.c b/drivers/gpu/drm/armada/armada_fbdev.c index 10e3fd87a83b..a2ce83f84800 100644 --- a/drivers/gpu/drm/armada/armada_fbdev.c +++ b/drivers/gpu/drm/armada/armada_fbdev.c @@ -51,13 +51,13 @@ static int armada_fb_create(struct drm_fb_helper *fbh, ret = armada_gem_linear_back(dev, obj); if (ret) { - drm_gem_object_unreference_unlocked(&obj->obj); + drm_gem_object_put_unlocked(&obj->obj); return ret; } ptr = armada_gem_map_object(dev, obj); if (!ptr) { - drm_gem_object_unreference_unlocked(&obj->obj); + drm_gem_object_put_unlocked(&obj->obj); return -ENOMEM; } @@ -67,7 +67,7 @@ static int armada_fb_create(struct drm_fb_helper *fbh, * A reference is now held by the framebuffer object if * successful, otherwise this drops the ref for the error path. */ - drm_gem_object_unreference_unlocked(&obj->obj); + drm_gem_object_put_unlocked(&obj->obj); if (IS_ERR(dfb)) return PTR_ERR(dfb); diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c index 7837e6adb16f..a97f509743a5 100644 --- a/drivers/gpu/drm/armada/armada_gem.c +++ b/drivers/gpu/drm/armada/armada_gem.c @@ -265,7 +265,7 @@ int armada_gem_dumb_create(struct drm_file *file, struct drm_device *dev, /* drop reference from allocate - handle holds it now */ DRM_DEBUG_DRIVER("obj %p size %zu handle %#x\n", dobj, size, handle); err: - drm_gem_object_unreference_unlocked(&dobj->obj); + drm_gem_object_put_unlocked(&dobj->obj); return ret; } @@ -297,7 +297,7 @@ int armada_gem_create_ioctl(struct drm_device *dev, void *data, /* drop reference from allocate - handle holds it now */ DRM_DEBUG_DRIVER("obj %p size %zu handle %#x\n", dobj, size, handle); err: - drm_gem_object_unreference_unlocked(&dobj->obj); + drm_gem_object_put_unlocked(&dobj->obj); return ret; } @@ -314,13 +314,13 @@ int armada_gem_mmap_ioctl(struct drm_device *dev, void *data, return -ENOENT; if (!dobj->obj.filp) { - drm_gem_object_unreference_unlocked(&dobj->obj); + drm_gem_object_put_unlocked(&dobj->obj); return -EINVAL; } addr = vm_mmap(dobj->obj.filp, 0, args->size, PROT_READ | PROT_WRITE, MAP_SHARED, args->offset); - drm_gem_object_unreference_unlocked(&dobj->obj); + drm_gem_object_put_unlocked(&dobj->obj); if (IS_ERR_VALUE(addr)) return addr; @@ -375,7 +375,7 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data, } unref: - drm_gem_object_unreference_unlocked(&dobj->obj); + drm_gem_object_put_unlocked(&dobj->obj); return ret; } @@ -524,7 +524,7 @@ armada_gem_prime_import(struct drm_device *dev, struct dma_buf *buf) * Importing our own dmabuf(s) increases the * refcount on the gem object itself. */ - drm_gem_object_reference(obj); + drm_gem_object_get(obj); return obj; } } diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c index edc44910d79f..b411b608821a 100644 --- a/drivers/gpu/drm/armada/armada_overlay.c +++ b/drivers/gpu/drm/armada/armada_overlay.c @@ -177,7 +177,7 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, * Take a reference on the new framebuffer - we want to * hold on to it while the hardware is displaying it. */ - drm_framebuffer_reference(fb); + drm_framebuffer_get(fb); if (plane->fb) armada_ovl_retire_fb(dplane, plane->fb); @@ -278,7 +278,7 @@ static int armada_ovl_plane_disable(struct drm_plane *plane, fb = xchg(&dplane->old_fb, NULL); if (fb) - drm_framebuffer_unreference(fb); + drm_framebuffer_put(fb); return 0; } diff --git a/drivers/gpu/drm/armada/armada_trace.c b/drivers/gpu/drm/armada/armada_trace.c index 068b336ba75f..c64cce325cdf 100644 --- a/drivers/gpu/drm/armada/armada_trace.c +++ b/drivers/gpu/drm/armada/armada_trace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #ifndef __CHECKER__ #define CREATE_TRACE_POINTS #include "armada_trace.h" diff --git a/drivers/gpu/drm/armada/armada_trace.h b/drivers/gpu/drm/armada/armada_trace.h index be245a24610f..8dbfea7a00fe 100644 --- a/drivers/gpu/drm/armada/armada_trace.h +++ b/drivers/gpu/drm/armada/armada_trace.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #if !defined(ARMADA_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) #define ARMADA_TRACE_H diff --git a/drivers/gpu/drm/ast/ast_dp501.c b/drivers/gpu/drm/ast/ast_dp501.c index 749646ae365f..4c7375b45281 100644 --- a/drivers/gpu/drm/ast/ast_dp501.c +++ b/drivers/gpu/drm/ast/ast_dp501.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/firmware.h> #include <drm/drmP.h> diff --git a/drivers/gpu/drm/ast/ast_dram_tables.h b/drivers/gpu/drm/ast/ast_dram_tables.h index 1d9c4e75d303..1e9ac9d6d26c 100644 --- a/drivers/gpu/drm/ast/ast_dram_tables.h +++ b/drivers/gpu/drm/ast/ast_dram_tables.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef AST_DRAM_TABLES_H #define AST_DRAM_TABLES_H diff --git a/drivers/gpu/drm/atmel-hlcdc/Makefile b/drivers/gpu/drm/atmel-hlcdc/Makefile index bb5f8507a8ce..49dc89f36b73 100644 --- a/drivers/gpu/drm/atmel-hlcdc/Makefile +++ b/drivers/gpu/drm/atmel-hlcdc/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 atmel-hlcdc-dc-y := atmel_hlcdc_crtc.o \ atmel_hlcdc_dc.o \ atmel_hlcdc_output.o \ diff --git a/drivers/gpu/drm/bochs/bochs.h b/drivers/gpu/drm/bochs/bochs.h index 76c490c3cdbc..375bf92cd04f 100644 --- a/drivers/gpu/drm/bochs/bochs.h +++ b/drivers/gpu/drm/bochs/bochs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #include <linux/io.h> #include <linux/console.h> diff --git a/drivers/gpu/drm/bridge/Makefile b/drivers/gpu/drm/bridge/Makefile index e3d5eb031f18..373eb28f31ed 100644 --- a/drivers/gpu/drm/bridge/Makefile +++ b/drivers/gpu/drm/bridge/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DRM_ANALOGIX_ANX78XX) += analogix-anx78xx.o obj-$(CONFIG_DRM_DUMB_VGA_DAC) += dumb-vga-dac.o obj-$(CONFIG_DRM_LVDS_ENCODER) += lvds-encoder.o diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index 31ca883bda83..0e14f1572d05 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -607,10 +607,10 @@ static int adv7511_get_modes(struct adv7511 *adv7511, adv7511_set_config_csc(adv7511, connector, adv7511->rgb, drm_detect_hdmi_monitor(edid)); - kfree(edid); - cec_s_phys_addr_from_edid(adv7511->cec_adap, edid); + kfree(edid); + return count; } diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h index fd1f745c6073..63b5756f463b 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef DW_HDMI_AUDIO_H #define DW_HDMI_AUDIO_H diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 562494873ca5..c2da5585e201 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1812,7 +1812,7 @@ int drm_atomic_debugfs_init(struct drm_minor *minor) */ static struct drm_pending_vblank_event *create_vblank_event( - struct drm_device *dev, uint64_t user_data) + struct drm_crtc *crtc, uint64_t user_data) { struct drm_pending_vblank_event *e = NULL; @@ -1822,7 +1822,8 @@ static struct drm_pending_vblank_event *create_vblank_event( e->event.base.type = DRM_EVENT_FLIP_COMPLETE; e->event.base.length = sizeof(e->event); - e->event.user_data = user_data; + e->event.vbl.crtc_id = crtc->base.id; + e->event.vbl.user_data = user_data; return e; } @@ -2076,7 +2077,7 @@ static int prepare_crtc_signaling(struct drm_device *dev, if (arg->flags & DRM_MODE_PAGE_FLIP_EVENT || fence_ptr) { struct drm_pending_vblank_event *e; - e = create_vblank_event(dev, arg->user_data); + e = create_vblank_event(crtc, arg->user_data); if (!e) return -ENOMEM; diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index ae56d91433ff..71d712f1b56a 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -860,6 +860,7 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state) for_each_oldnew_crtc_in_state(old_state, crtc, old_crtc_state, new_crtc_state, i) { const struct drm_crtc_helper_funcs *funcs; + int ret; /* Shut down everything that needs a full modeset. */ if (!drm_atomic_crtc_needs_modeset(new_crtc_state)) @@ -883,6 +884,14 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state) funcs->disable(crtc); else funcs->dpms(crtc, DRM_MODE_DPMS_OFF); + + if (!(dev->irq_enabled && dev->num_crtcs)) + continue; + + ret = drm_crtc_vblank_get(crtc); + WARN_ONCE(ret != -EINVAL, "driver forgot to call drm_crtc_vblank_off()\n"); + if (ret == 0) + drm_crtc_vblank_put(crtc); } } @@ -1772,16 +1781,16 @@ int drm_atomic_helper_setup_commit(struct drm_atomic_state *state, } for_each_oldnew_connector_in_state(state, conn, old_conn_state, new_conn_state, i) { - /* commit tracked through new_crtc_state->commit, no need to do it explicitly */ - if (new_conn_state->crtc) - continue; - /* Userspace is not allowed to get ahead of the previous * commit with nonblocking ones. */ if (nonblock && old_conn_state->commit && !try_wait_for_completion(&old_conn_state->commit->flip_done)) return -EBUSY; + /* commit tracked through new_crtc_state->commit, no need to do it explicitly */ + if (new_conn_state->crtc) + continue; + commit = crtc_or_fake_commit(state, old_conn_state->crtc); if (!commit) return -ENOMEM; @@ -1790,18 +1799,17 @@ int drm_atomic_helper_setup_commit(struct drm_atomic_state *state, } for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) { - /* - * Unlike connectors, always track planes explicitly for - * async pageflip support. - */ - /* Userspace is not allowed to get ahead of the previous * commit with nonblocking ones. */ if (nonblock && old_plane_state->commit && !try_wait_for_completion(&old_plane_state->commit->flip_done)) return -EBUSY; - commit = crtc_or_fake_commit(state, old_plane_state->crtc); + /* + * Unlike connectors, always track planes explicitly for + * async pageflip support. + */ + commit = crtc_or_fake_commit(state, new_plane_state->crtc ?: old_plane_state->crtc); if (!commit) return -ENOMEM; diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c index 7ff697389d74..aad468d170a7 100644 --- a/drivers/gpu/drm/drm_auth.c +++ b/drivers/gpu/drm/drm_auth.c @@ -31,6 +31,7 @@ #include <drm/drmP.h> #include "drm_internal.h" #include "drm_legacy.h" +#include <drm/drm_lease.h> /** * DOC: master and authentication @@ -93,7 +94,7 @@ int drm_authmagic(struct drm_device *dev, void *data, return file ? 0 : -EINVAL; } -static struct drm_master *drm_master_create(struct drm_device *dev) +struct drm_master *drm_master_create(struct drm_device *dev) { struct drm_master *master; @@ -107,6 +108,14 @@ static struct drm_master *drm_master_create(struct drm_device *dev) idr_init(&master->magic_map); master->dev = dev; + /* initialize the tree of output resource lessees */ + master->lessor = NULL; + master->lessee_id = 0; + INIT_LIST_HEAD(&master->lessees); + INIT_LIST_HEAD(&master->lessee_list); + idr_init(&master->leases); + idr_init(&master->lessee_idr); + return master; } @@ -189,6 +198,12 @@ int drm_setmaster_ioctl(struct drm_device *dev, void *data, goto out_unlock; } + if (file_priv->master->lessor != NULL) { + DRM_DEBUG_LEASE("Attempt to set lessee %d as master\n", file_priv->master->lessee_id); + ret = -EINVAL; + goto out_unlock; + } + ret = drm_set_master(dev, file_priv, false); out_unlock: mutex_unlock(&dev->master_mutex); @@ -270,6 +285,13 @@ void drm_master_release(struct drm_file *file_priv) if (dev->master == file_priv->master) drm_drop_master(dev, file_priv); out: + if (drm_core_check_feature(dev, DRIVER_MODESET) && file_priv->is_master) { + /* Revoke any leases held by this or lessees, but only if + * this is the "real" master + */ + drm_lease_revoke(master); + } + /* drop the master reference held by the file priv */ if (file_priv->master) drm_master_put(&file_priv->master); @@ -288,7 +310,7 @@ out: */ bool drm_is_current_master(struct drm_file *fpriv) { - return fpriv->is_master && fpriv->master == fpriv->minor->dev->master; + return fpriv->is_master && drm_lease_owner(fpriv->master) == fpriv->minor->dev->master; } EXPORT_SYMBOL(drm_is_current_master); @@ -310,12 +332,18 @@ static void drm_master_destroy(struct kref *kref) struct drm_master *master = container_of(kref, struct drm_master, refcount); struct drm_device *dev = master->dev; + if (drm_core_check_feature(dev, DRIVER_MODESET)) + drm_lease_destroy(master); + if (dev->driver->master_destroy) dev->driver->master_destroy(dev, master); drm_legacy_master_rmmaps(dev, master); idr_destroy(&master->magic_map); + idr_destroy(&master->leases); + idr_destroy(&master->lessee_idr); + kfree(master->unique); kfree(master); } diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c index d34e5096887a..053044201e31 100644 --- a/drivers/gpu/drm/drm_dp_aux_dev.c +++ b/drivers/gpu/drm/drm_dp_aux_dev.c @@ -263,12 +263,6 @@ static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_aux(struct drm_dp_aux *aux) return aux_dev; } -static int auxdev_wait_atomic_t(atomic_t *p) -{ - schedule(); - return 0; -} - void drm_dp_aux_unregister_devnode(struct drm_dp_aux *aux) { struct drm_dp_aux_dev *aux_dev; @@ -283,7 +277,7 @@ void drm_dp_aux_unregister_devnode(struct drm_dp_aux *aux) mutex_unlock(&aux_idr_mutex); atomic_dec(&aux_dev->usecount); - wait_on_atomic_t(&aux_dev->usecount, auxdev_wait_atomic_t, + wait_on_atomic_t(&aux_dev->usecount, atomic_t_wait, TASK_UNINTERRUPTIBLE); minor = aux_dev->index; diff --git a/drivers/gpu/drm/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/drm_dp_dual_mode_helper.c index 0ef9011a1856..02a50929af67 100644 --- a/drivers/gpu/drm/drm_dp_dual_mode_helper.c +++ b/drivers/gpu/drm/drm_dp_dual_mode_helper.c @@ -410,6 +410,7 @@ int drm_lspcon_get_mode(struct i2c_adapter *adapter, { u8 data; int ret = 0; + int retry; if (!mode) { DRM_ERROR("NULL input\n"); @@ -417,10 +418,19 @@ int drm_lspcon_get_mode(struct i2c_adapter *adapter, } /* Read Status: i2c over aux */ - ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_LSPCON_CURRENT_MODE, - &data, sizeof(data)); + for (retry = 0; retry < 6; retry++) { + if (retry) + usleep_range(500, 1000); + + ret = drm_dp_dual_mode_read(adapter, + DP_DUAL_MODE_LSPCON_CURRENT_MODE, + &data, sizeof(data)); + if (!ret) + break; + } + if (ret < 0) { - DRM_ERROR("LSPCON read(0x80, 0x41) failed\n"); + DRM_DEBUG_KMS("LSPCON read(0x80, 0x41) failed\n"); return -EFAULT; } diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index c0292e5d7281..a934fd5e7e55 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -57,7 +57,8 @@ MODULE_PARM_DESC(debug, "Enable debug output, where each bit enables a debug cat "\t\tBit 2 (0x04) will enable KMS messages (modesetting code)\n" "\t\tBit 3 (0x08) will enable PRIME messages (prime code)\n" "\t\tBit 4 (0x10) will enable ATOMIC messages (atomic code)\n" -"\t\tBit 5 (0x20) will enable VBL messages (vblank code)"); +"\t\tBit 5 (0x20) will enable VBL messages (vblank code)\n" +"\t\tBit 7 (0x80) will enable LEASE messages (leasing code)"); module_param_named(debug, drm_debug, int, 0600); static DEFINE_SPINLOCK(drm_minor_lock); diff --git a/drivers/gpu/drm/drm_encoder.c b/drivers/gpu/drm/drm_encoder.c index 43f644844b83..59e0ebe733f8 100644 --- a/drivers/gpu/drm/drm_encoder.c +++ b/drivers/gpu/drm/drm_encoder.c @@ -226,7 +226,7 @@ int drm_mode_getencoder(struct drm_device *dev, void *data, drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); crtc = drm_encoder_get_crtc(encoder); - if (crtc) + if (crtc && drm_lease_held(file_priv, crtc->base.id)) enc_resp->crtc_id = crtc->base.id; else enc_resp->crtc_id = 0; @@ -234,7 +234,8 @@ int drm_mode_getencoder(struct drm_device *dev, void *data, enc_resp->encoder_type = encoder->encoder_type; enc_resp->encoder_id = encoder->base.id; - enc_resp->possible_crtcs = encoder->possible_crtcs; + enc_resp->possible_crtcs = drm_lease_filter_crtcs(file_priv, + encoder->possible_crtcs); enc_resp->possible_clones = encoder->possible_clones; return 0; diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index 2affe53f3fda..279c1035c12d 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -681,6 +681,7 @@ EXPORT_SYMBOL(drm_framebuffer_init); /** * drm_framebuffer_lookup - look up a drm framebuffer and grab a reference * @dev: drm device + * @file_priv: drm file to check for lease against. * @id: id of the fb object * * If successful, this grabs an additional reference to the framebuffer - diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c index 373e33f22be4..020e7668dfab 100644 --- a/drivers/gpu/drm/drm_gem_cma_helper.c +++ b/drivers/gpu/drm/drm_gem_cma_helper.c @@ -112,7 +112,7 @@ struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm, cma_obj->vaddr = dma_alloc_wc(drm->dev, size, &cma_obj->paddr, GFP_KERNEL | __GFP_NOWARN); if (!cma_obj->vaddr) { - dev_err(drm->dev, "failed to allocate buffer with size %zu\n", + dev_dbg(drm->dev, "failed to allocate buffer with size %zu\n", size); ret = -ENOMEM; goto error; diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index edd921adcf33..c9d5a6cd4d41 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -70,6 +70,12 @@ int drm_legacy_modeset_ctl_ioctl(struct drm_device *dev, void *data, int drm_legacy_irq_control(struct drm_device *dev, void *data, struct drm_file *file_priv); +int drm_crtc_get_sequence_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); + +int drm_crtc_queue_sequence_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); + /* drm_auth.c */ int drm_getmagic(struct drm_device *dev, void *data, struct drm_file *file_priv); diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index a78f03155466..4aafe4802099 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -663,6 +663,12 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_LIST_LESSEES, drm_mode_list_lessees_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GET_LEASE, drm_mode_get_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_REVOKE_LEASE, drm_mode_revoke_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), }; #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c new file mode 100644 index 000000000000..d1eb56a1eff4 --- /dev/null +++ b/drivers/gpu/drm/drm_lease.c @@ -0,0 +1,767 @@ +/* + * Copyright © 2017 Keith Packard <keithp@keithp.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <drm/drmP.h> +#include "drm_internal.h" +#include "drm_legacy.h" +#include "drm_crtc_internal.h" +#include <drm/drm_lease.h> +#include <drm/drm_auth.h> +#include <drm/drm_crtc_helper.h> + +#define drm_for_each_lessee(lessee, lessor) \ + list_for_each_entry((lessee), &(lessor)->lessees, lessee_list) + +static uint64_t drm_lease_idr_object; + +/** + * drm_lease_owner - return ancestor owner drm_master + * @master: drm_master somewhere within tree of lessees and lessors + * + * RETURN: + * + * drm_master at the top of the tree (i.e, with lessor NULL + */ +struct drm_master *drm_lease_owner(struct drm_master *master) +{ + while (master->lessor != NULL) + master = master->lessor; + return master; +} +EXPORT_SYMBOL(drm_lease_owner); + +/** + * _drm_find_lessee - find lessee by id (idr_mutex held) + * @master: drm_master of lessor + * @id: lessee_id + * + * RETURN: + * + * drm_master of the lessee if valid, NULL otherwise + */ + +static struct drm_master* +_drm_find_lessee(struct drm_master *master, int lessee_id) +{ + lockdep_assert_held(&master->dev->mode_config.idr_mutex); + return idr_find(&drm_lease_owner(master)->lessee_idr, lessee_id); +} + +/** + * _drm_lease_held_master - check to see if an object is leased (or owned) by master (idr_mutex held) + * @master: the master to check the lease status of + * @id: the id to check + * + * Checks if the specified master holds a lease on the object. Return + * value: + * + * true 'master' holds a lease on (or owns) the object + * false 'master' does not hold a lease. + */ +static int _drm_lease_held_master(struct drm_master *master, int id) +{ + lockdep_assert_held(&master->dev->mode_config.idr_mutex); + if (master->lessor) + return idr_find(&master->leases, id) != NULL; + return true; +} + +/** + * _drm_has_leased - check to see if an object has been leased (idr_mutex held) + * @master: the master to check the lease status of + * @id: the id to check + * + * Checks if any lessee of 'master' holds a lease on 'id'. Return + * value: + * + * true Some lessee holds a lease on the object. + * false No lessee has a lease on the object. + */ +static bool _drm_has_leased(struct drm_master *master, int id) +{ + struct drm_master *lessee; + + lockdep_assert_held(&master->dev->mode_config.idr_mutex); + drm_for_each_lessee(lessee, master) + if (_drm_lease_held_master(lessee, id)) + return true; + return false; +} + +/** + * _drm_lease_held - check drm_mode_object lease status (idr_mutex held) + * @master: the drm_master + * @id: the object id + * + * Checks if the specified master holds a lease on the object. Return + * value: + * + * true 'master' holds a lease on (or owns) the object + * false 'master' does not hold a lease. + */ +bool _drm_lease_held(struct drm_file *file_priv, int id) +{ + if (file_priv == NULL || file_priv->master == NULL) + return true; + + return _drm_lease_held_master(file_priv->master, id); +} +EXPORT_SYMBOL(_drm_lease_held); + +/** + * drm_lease_held - check drm_mode_object lease status (idr_mutex not held) + * @master: the drm_master + * @id: the object id + * + * Checks if the specified master holds a lease on the object. Return + * value: + * + * true 'master' holds a lease on (or owns) the object + * false 'master' does not hold a lease. + */ +bool drm_lease_held(struct drm_file *file_priv, int id) +{ + struct drm_master *master; + bool ret; + + if (file_priv == NULL || file_priv->master == NULL) + return true; + + master = file_priv->master; + mutex_lock(&master->dev->mode_config.idr_mutex); + ret = _drm_lease_held_master(master, id); + mutex_unlock(&master->dev->mode_config.idr_mutex); + return ret; +} +EXPORT_SYMBOL(drm_lease_held); + +/** + * drm_lease_filter_crtcs - restricted crtc set to leased values (idr_mutex not held) + * @file_priv: requestor file + * @crtcs: bitmask of crtcs to check + * + * Reconstructs a crtc mask based on the crtcs which are visible + * through the specified file. + */ +uint32_t drm_lease_filter_crtcs(struct drm_file *file_priv, uint32_t crtcs_in) +{ + struct drm_master *master; + struct drm_device *dev; + struct drm_crtc *crtc; + int count_in, count_out; + uint32_t crtcs_out = 0; + + if (file_priv == NULL || file_priv->master == NULL) + return crtcs_in; + + master = file_priv->master; + dev = master->dev; + + count_in = count_out = 0; + mutex_lock(&master->dev->mode_config.idr_mutex); + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + if (_drm_lease_held_master(master, crtc->base.id)) { + uint32_t mask_in = 1ul << count_in; + if ((crtcs_in & mask_in) != 0) { + uint32_t mask_out = 1ul << count_out; + crtcs_out |= mask_out; + } + count_out++; + } + count_in++; + } + mutex_unlock(&master->dev->mode_config.idr_mutex); + return crtcs_out; +} +EXPORT_SYMBOL(drm_lease_filter_crtcs); + +/* + * drm_lease_create - create a new drm_master with leased objects (idr_mutex not held) + * @lessor: lease holder (or owner) of objects + * @leases: objects to lease to the new drm_master + * + * Uses drm_master_create to allocate a new drm_master, then checks to + * make sure all of the desired objects can be leased, atomically + * leasing them to the new drmmaster. + * + * ERR_PTR(-EACCESS) some other master holds the title to any object + * ERR_PTR(-ENOENT) some object is not a valid DRM object for this device + * ERR_PTR(-EBUSY) some other lessee holds title to this object + * ERR_PTR(-EEXIST) same object specified more than once in the provided list + * ERR_PTR(-ENOMEM) allocation failed + */ +static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr *leases) +{ + struct drm_device *dev = lessor->dev; + int error; + struct drm_master *lessee; + int object; + int id; + void *entry; + + DRM_DEBUG_LEASE("lessor %d\n", lessor->lessee_id); + + lessee = drm_master_create(lessor->dev); + if (!lessee) { + DRM_DEBUG_LEASE("drm_master_create failed\n"); + return ERR_PTR(-ENOMEM); + } + + mutex_lock(&dev->mode_config.idr_mutex); + + /* Insert the new lessee into the tree */ + id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL); + if (id < 0) { + error = id; + goto out_lessee; + } + + lessee->lessee_id = id; + lessee->lessor = drm_master_get(lessor); + list_add_tail(&lessee->lessee_list, &lessor->lessees); + + idr_for_each_entry(leases, entry, object) { + error = 0; + if (!idr_find(&dev->mode_config.crtc_idr, object)) + error = -ENOENT; + else if (!_drm_lease_held_master(lessor, object)) + error = -EACCES; + else if (_drm_has_leased(lessor, object)) + error = -EBUSY; + + if (error != 0) { + DRM_DEBUG_LEASE("object %d failed %d\n", object, error); + goto out_lessee; + } + } + + /* Move the leases over */ + lessee->leases = *leases; + DRM_DEBUG_LEASE("new lessee %d %p, lessor %d %p\n", lessee->lessee_id, lessee, lessor->lessee_id, lessor); + + mutex_unlock(&dev->mode_config.idr_mutex); + return lessee; + +out_lessee: + drm_master_put(&lessee); + + mutex_unlock(&dev->mode_config.idr_mutex); + + return ERR_PTR(error); +} + +/** + * drm_lease_destroy - a master is going away (idr_mutex not held) + * @master: the drm_master being destroyed + * + * All lessees will have been destroyed as they + * hold a reference on their lessor. Notify any + * lessor for this master so that it can check + * the list of lessees. + */ +void drm_lease_destroy(struct drm_master *master) +{ + struct drm_device *dev = master->dev; + + mutex_lock(&dev->mode_config.idr_mutex); + + DRM_DEBUG_LEASE("drm_lease_destroy %d\n", master->lessee_id); + + /* This master is referenced by all lessees, hence it cannot be destroyed + * until all of them have been + */ + WARN_ON(!list_empty(&master->lessees)); + + /* Remove this master from the lessee idr in the owner */ + if (master->lessee_id != 0) { + DRM_DEBUG_LEASE("remove master %d from device list of lessees\n", master->lessee_id); + idr_remove(&(drm_lease_owner(master)->lessee_idr), master->lessee_id); + } + + /* Remove this master from any lessee list it may be on */ + list_del(&master->lessee_list); + + mutex_unlock(&dev->mode_config.idr_mutex); + + if (master->lessor) { + /* Tell the master to check the lessee list */ + drm_sysfs_hotplug_event(dev); + drm_master_put(&master->lessor); + } + + DRM_DEBUG_LEASE("drm_lease_destroy done %d\n", master->lessee_id); +} + +/** + * _drm_lease_revoke - revoke access to all leased objects (idr_mutex held) + * @master: the master losing its lease + */ +static void _drm_lease_revoke(struct drm_master *top) +{ + int object; + void *entry; + struct drm_master *master = top; + + lockdep_assert_held(&top->dev->mode_config.idr_mutex); + + /* + * Walk the tree starting at 'top' emptying all leases. Because + * the tree is fully connected, we can do this without recursing + */ + for (;;) { + DRM_DEBUG_LEASE("revoke leases for %p %d\n", master, master->lessee_id); + + /* Evacuate the lease */ + idr_for_each_entry(&master->leases, entry, object) + idr_remove(&master->leases, object); + + /* Depth-first list walk */ + + /* Down */ + if (!list_empty(&master->lessees)) { + master = list_first_entry(&master->lessees, struct drm_master, lessee_list); + } else { + /* Up */ + while (master != top && master == list_last_entry(&master->lessor->lessees, struct drm_master, lessee_list)) + master = master->lessor; + + if (master == top) + break; + + /* Over */ + master = list_entry(master->lessee_list.next, struct drm_master, lessee_list); + } + } +} + +/** + * drm_lease_revoke - revoke access to all leased objects (idr_mutex not held) + * @top: the master losing its lease + */ +void drm_lease_revoke(struct drm_master *top) +{ + mutex_lock(&top->dev->mode_config.idr_mutex); + _drm_lease_revoke(top); + mutex_unlock(&top->dev->mode_config.idr_mutex); +} + +static int validate_lease(struct drm_device *dev, + struct drm_file *lessor_priv, + int object_count, + struct drm_mode_object **objects) +{ + int o; + int has_crtc = -1; + int has_connector = -1; + int has_plane = -1; + + /* we want to confirm that there is at least one crtc, plane + connector object. */ + + for (o = 0; o < object_count; o++) { + if (objects[o]->type == DRM_MODE_OBJECT_CRTC && has_crtc == -1) { + has_crtc = o; + } + if (objects[o]->type == DRM_MODE_OBJECT_CONNECTOR && has_connector == -1) + has_connector = o; + + if (lessor_priv->universal_planes) { + if (objects[o]->type == DRM_MODE_OBJECT_PLANE && has_plane == -1) + has_plane = o; + } + } + if (has_crtc == -1 || has_connector == -1) + return -EINVAL; + if (lessor_priv->universal_planes && has_plane == -1) + return -EINVAL; + return 0; +} + +static int fill_object_idr(struct drm_device *dev, + struct drm_file *lessor_priv, + struct idr *leases, + int object_count, + u32 *object_ids) +{ + struct drm_mode_object **objects; + u32 o; + int ret; + objects = kcalloc(object_count, sizeof(struct drm_mode_object *), + GFP_KERNEL); + if (!objects) + return -ENOMEM; + + /* step one - get references to all the mode objects + and check for validity. */ + for (o = 0; o < object_count; o++) { + if ((int) object_ids[o] < 0) { + ret = -EINVAL; + goto out_free_objects; + } + + objects[o] = drm_mode_object_find(dev, lessor_priv, + object_ids[o], + DRM_MODE_OBJECT_ANY); + if (!objects[o]) { + ret = -ENOENT; + goto out_free_objects; + } + + if (!drm_mode_object_lease_required(objects[o]->type)) { + ret = -EINVAL; + goto out_free_objects; + } + } + + ret = validate_lease(dev, lessor_priv, object_count, objects); + if (ret) + goto out_free_objects; + + /* add their IDs to the lease request - taking into account + universal planes */ + for (o = 0; o < object_count; o++) { + struct drm_mode_object *obj = objects[o]; + u32 object_id = objects[o]->id; + DRM_DEBUG_LEASE("Adding object %d to lease\n", object_id); + + /* + * We're using an IDR to hold the set of leased + * objects, but we don't need to point at the object's + * data structure from the lease as the main crtc_idr + * will be used to actually find that. Instead, all we + * really want is a 'leased/not-leased' result, for + * which any non-NULL pointer will work fine. + */ + ret = idr_alloc(leases, &drm_lease_idr_object , object_id, object_id + 1, GFP_KERNEL); + if (ret < 0) { + DRM_DEBUG_LEASE("Object %d cannot be inserted into leases (%d)\n", + object_id, ret); + goto out_free_objects; + } + if (obj->type == DRM_MODE_OBJECT_CRTC && !lessor_priv->universal_planes) { + struct drm_crtc *crtc = obj_to_crtc(obj); + ret = idr_alloc(leases, &drm_lease_idr_object, crtc->primary->base.id, crtc->primary->base.id + 1, GFP_KERNEL); + if (ret < 0) { + DRM_DEBUG_LEASE("Object primary plane %d cannot be inserted into leases (%d)\n", + object_id, ret); + goto out_free_objects; + } + if (crtc->cursor) { + ret = idr_alloc(leases, &drm_lease_idr_object, crtc->cursor->base.id, crtc->cursor->base.id + 1, GFP_KERNEL); + if (ret < 0) { + DRM_DEBUG_LEASE("Object cursor plane %d cannot be inserted into leases (%d)\n", + object_id, ret); + goto out_free_objects; + } + } + } + } + + ret = 0; +out_free_objects: + for (o = 0; o < object_count; o++) { + if (objects[o]) + drm_mode_object_put(objects[o]); + } + kfree(objects); + return ret; +} + +/** + * drm_mode_create_lease_ioctl - create a new lease + * @dev: the drm device + * @data: pointer to struct drm_mode_create_lease + * @file_priv: the file being manipulated + * + * The master associated with the specified file will have a lease + * created containing the objects specified in the ioctl structure. + * A file descriptor will be allocated for that and returned to the + * application. + */ +int drm_mode_create_lease_ioctl(struct drm_device *dev, + void *data, struct drm_file *lessor_priv) +{ + struct drm_mode_create_lease *cl = data; + size_t object_count; + int ret = 0; + struct idr leases; + struct drm_master *lessor = lessor_priv->master; + struct drm_master *lessee = NULL; + struct file *lessee_file = NULL; + struct file *lessor_file = lessor_priv->filp; + struct drm_file *lessee_priv; + int fd = -1; + uint32_t *object_ids; + + /* Can't lease without MODESET */ + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + + /* Do not allow sub-leases */ + if (lessor->lessor) + return -EINVAL; + + /* need some objects */ + if (cl->object_count == 0) + return -EINVAL; + + if (cl->flags && (cl->flags & ~(O_CLOEXEC | O_NONBLOCK))) + return -EINVAL; + + object_count = cl->object_count; + + object_ids = memdup_user(u64_to_user_ptr(cl->object_ids), object_count * sizeof(__u32)); + if (IS_ERR(object_ids)) + return PTR_ERR(object_ids); + + idr_init(&leases); + + /* fill and validate the object idr */ + ret = fill_object_idr(dev, lessor_priv, &leases, + object_count, object_ids); + kfree(object_ids); + if (ret) { + idr_destroy(&leases); + return ret; + } + + /* Allocate a file descriptor for the lease */ + fd = get_unused_fd_flags(cl->flags & (O_CLOEXEC | O_NONBLOCK)); + if (fd < 0) { + idr_destroy(&leases); + return fd; + } + + DRM_DEBUG_LEASE("Creating lease\n"); + lessee = drm_lease_create(lessor, &leases); + + if (IS_ERR(lessee)) { + ret = PTR_ERR(lessee); + goto out_leases; + } + + /* Clone the lessor file to create a new file for us */ + DRM_DEBUG_LEASE("Allocating lease file\n"); + path_get(&lessor_file->f_path); + lessee_file = alloc_file(&lessor_file->f_path, + lessor_file->f_mode, + fops_get(lessor_file->f_inode->i_fop)); + + if (IS_ERR(lessee_file)) { + ret = PTR_ERR(lessee_file); + goto out_lessee; + } + + /* Initialize the new file for DRM */ + DRM_DEBUG_LEASE("Initializing the file with %p\n", lessee_file->f_op->open); + ret = lessee_file->f_op->open(lessee_file->f_inode, lessee_file); + if (ret) + goto out_lessee_file; + + lessee_priv = lessee_file->private_data; + + /* Change the file to a master one */ + drm_master_put(&lessee_priv->master); + lessee_priv->master = lessee; + lessee_priv->is_master = 1; + lessee_priv->authenticated = 1; + + /* Hook up the fd */ + fd_install(fd, lessee_file); + + /* Pass fd back to userspace */ + DRM_DEBUG_LEASE("Returning fd %d id %d\n", fd, lessee->lessee_id); + cl->fd = fd; + cl->lessee_id = lessee->lessee_id; + + DRM_DEBUG_LEASE("drm_mode_create_lease_ioctl succeeded\n"); + return 0; + +out_lessee_file: + fput(lessee_file); + +out_lessee: + drm_master_put(&lessee); + +out_leases: + put_unused_fd(fd); + idr_destroy(&leases); + + DRM_DEBUG_LEASE("drm_mode_create_lease_ioctl failed: %d\n", ret); + return ret; +} + +/** + * drm_mode_list_lessees_ioctl - list lessee ids + * @dev: the drm device + * @data: pointer to struct drm_mode_list_lessees + * @lessor_priv: the file being manipulated + * + * Starting from the master associated with the specified file, + * the master with the provided lessee_id is found, and then + * an array of lessee ids associated with leases from that master + * are returned. + */ + +int drm_mode_list_lessees_ioctl(struct drm_device *dev, + void *data, struct drm_file *lessor_priv) +{ + struct drm_mode_list_lessees *arg = data; + __u32 __user *lessee_ids = (__u32 __user *) (uintptr_t) (arg->lessees_ptr); + __u32 count_lessees = arg->count_lessees; + struct drm_master *lessor = lessor_priv->master, *lessee; + int count; + int ret = 0; + + if (arg->pad) + return -EINVAL; + + /* Can't lease without MODESET */ + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + + DRM_DEBUG_LEASE("List lessees for %d\n", lessor->lessee_id); + + mutex_lock(&dev->mode_config.idr_mutex); + + count = 0; + drm_for_each_lessee(lessee, lessor) { + /* Only list un-revoked leases */ + if (!idr_is_empty(&lessee->leases)) { + if (count_lessees > count) { + DRM_DEBUG_LEASE("Add lessee %d\n", lessee->lessee_id); + ret = put_user(lessee->lessee_id, lessee_ids + count); + if (ret) + break; + } + count++; + } + } + + DRM_DEBUG_LEASE("Lessor leases to %d\n", count); + if (ret == 0) + arg->count_lessees = count; + + mutex_unlock(&dev->mode_config.idr_mutex); + + return ret; +} + +/** + * drm_mode_get_lease_ioctl - list leased objects + * @dev: the drm device + * @data: pointer to struct drm_mode_get_lease + * @file_priv: the file being manipulated + * + * Return the list of leased objects for the specified lessee + */ + +int drm_mode_get_lease_ioctl(struct drm_device *dev, + void *data, struct drm_file *lessee_priv) +{ + struct drm_mode_get_lease *arg = data; + __u32 __user *object_ids = (__u32 __user *) (uintptr_t) (arg->objects_ptr); + __u32 count_objects = arg->count_objects; + struct drm_master *lessee = lessee_priv->master; + struct idr *object_idr; + int count; + void *entry; + int object; + int ret = 0; + + if (arg->pad) + return -EINVAL; + + /* Can't lease without MODESET */ + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + + DRM_DEBUG_LEASE("get lease for %d\n", lessee->lessee_id); + + mutex_lock(&dev->mode_config.idr_mutex); + + if (lessee->lessor == NULL) + /* owner can use all objects */ + object_idr = &lessee->dev->mode_config.crtc_idr; + else + /* lessee can only use allowed object */ + object_idr = &lessee->leases; + + count = 0; + idr_for_each_entry(object_idr, entry, object) { + if (count_objects > count) { + DRM_DEBUG_LEASE("adding object %d\n", object); + ret = put_user(object, object_ids + count); + if (ret) + break; + } + count++; + } + + DRM_DEBUG("lease holds %d objects\n", count); + if (ret == 0) + arg->count_objects = count; + + mutex_unlock(&dev->mode_config.idr_mutex); + + return ret; +} + +/** + * drm_mode_revoke_lease_ioctl - revoke lease + * @dev: the drm device + * @data: pointer to struct drm_mode_revoke_lease + * @file_priv: the file being manipulated + * + * This removes all of the objects from the lease without + * actually getting rid of the lease itself; that way all + * references to it still work correctly + */ +int drm_mode_revoke_lease_ioctl(struct drm_device *dev, + void *data, struct drm_file *lessor_priv) +{ + struct drm_mode_revoke_lease *arg = data; + struct drm_master *lessor = lessor_priv->master; + struct drm_master *lessee; + int ret = 0; + + DRM_DEBUG_LEASE("revoke lease for %d\n", arg->lessee_id); + + /* Can't lease without MODESET */ + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + + mutex_lock(&dev->mode_config.idr_mutex); + + lessee = _drm_find_lessee(lessor, arg->lessee_id); + + /* No such lessee */ + if (!lessee) { + ret = -ENOENT; + goto fail; + } + + /* Lease is not held by lessor */ + if (lessee->lessor != lessor) { + ret = -EACCES; + goto fail; + } + + _drm_lease_revoke(lessee); + +fail: + mutex_unlock(&dev->mode_config.idr_mutex); + + return ret; +} diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c index 74f6ff5df656..cda8bfab6d3b 100644 --- a/drivers/gpu/drm/drm_mode_config.c +++ b/drivers/gpu/drm/drm_mode_config.c @@ -122,10 +122,12 @@ int drm_mode_getresources(struct drm_device *dev, void *data, count = 0; crtc_id = u64_to_user_ptr(card_res->crtc_id_ptr); drm_for_each_crtc(crtc, dev) { - if (count < card_res->count_crtcs && - put_user(crtc->base.id, crtc_id + count)) - return -EFAULT; - count++; + if (drm_lease_held(file_priv, crtc->base.id)) { + if (count < card_res->count_crtcs && + put_user(crtc->base.id, crtc_id + count)) + return -EFAULT; + count++; + } } card_res->count_crtcs = count; @@ -143,12 +145,14 @@ int drm_mode_getresources(struct drm_device *dev, void *data, count = 0; connector_id = u64_to_user_ptr(card_res->connector_id_ptr); drm_for_each_connector_iter(connector, &conn_iter) { - if (count < card_res->count_connectors && - put_user(connector->base.id, connector_id + count)) { - drm_connector_list_iter_end(&conn_iter); - return -EFAULT; + if (drm_lease_held(file_priv, connector->base.id)) { + if (count < card_res->count_connectors && + put_user(connector->base.id, connector_id + count)) { + drm_connector_list_iter_end(&conn_iter); + return -EFAULT; + } + count++; } - count++; } card_res->count_connectors = count; drm_connector_list_iter_end(&conn_iter); @@ -385,7 +389,6 @@ void drm_mode_config_init(struct drm_device *dev) dev->mode_config.num_connector = 0; dev->mode_config.num_crtc = 0; dev->mode_config.num_encoder = 0; - dev->mode_config.num_overlay_plane = 0; dev->mode_config.num_total_plane = 0; } EXPORT_SYMBOL(drm_mode_config_init); diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c index 240a05d91a53..ce4d2fb32810 100644 --- a/drivers/gpu/drm/drm_mode_object.c +++ b/drivers/gpu/drm/drm_mode_object.c @@ -104,6 +104,25 @@ void drm_mode_object_unregister(struct drm_device *dev, mutex_unlock(&dev->mode_config.idr_mutex); } +/** + * drm_lease_required - check types which must be leased to be used + * @type: type of object + * + * Returns whether the provided type of drm_mode_object must + * be owned or leased to be used by a process. + */ +bool drm_mode_object_lease_required(uint32_t type) +{ + switch(type) { + case DRM_MODE_OBJECT_CRTC: + case DRM_MODE_OBJECT_CONNECTOR: + case DRM_MODE_OBJECT_PLANE: + return true; + default: + return false; + } +} + struct drm_mode_object *__drm_mode_object_find(struct drm_device *dev, struct drm_file *file_priv, uint32_t id, uint32_t type) @@ -117,6 +136,10 @@ struct drm_mode_object *__drm_mode_object_find(struct drm_device *dev, if (obj && obj->id != id) obj = NULL; + if (obj && drm_mode_object_lease_required(obj->type) && + !_drm_lease_held(file_priv, obj->id)) + obj = NULL; + if (obj && obj->free_cb) { if (!kref_get_unless_zero(&obj->refcount)) obj = NULL; @@ -128,6 +151,7 @@ struct drm_mode_object *__drm_mode_object_find(struct drm_device *dev, /** * drm_mode_object_find - look up a drm object with static lifetime + * @dev: drm device * @file_priv: drm file * @id: id of the mode object * @type: type of the mode object diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c index e123497da0ca..963e23db0fe7 100644 --- a/drivers/gpu/drm/drm_modeset_lock.c +++ b/drivers/gpu/drm/drm_modeset_lock.c @@ -93,7 +93,7 @@ void drm_modeset_lock_all(struct drm_device *dev) struct drm_modeset_acquire_ctx *ctx; int ret; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL | __GFP_NOFAIL); if (WARN_ON(!ctx)) return; diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 6af02c7b5da3..19404e34cd59 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -241,8 +241,6 @@ int drm_universal_plane_init(struct drm_device *dev, struct drm_plane *plane, list_add_tail(&plane->head, &config->plane_list); plane->index = config->num_total_plane++; - if (plane->type == DRM_PLANE_TYPE_OVERLAY) - config->num_overlay_plane++; drm_object_attach_property(&plane->base, config->plane_type_property, @@ -353,8 +351,6 @@ void drm_plane_cleanup(struct drm_plane *plane) list_del(&plane->head); dev->mode_config.num_total_plane--; - if (plane->type == DRM_PLANE_TYPE_OVERLAY) - dev->mode_config.num_overlay_plane--; WARN_ON(plane->state && !plane->funcs->atomic_destroy_state); if (plane->state && plane->funcs->atomic_destroy_state) @@ -462,43 +458,35 @@ int drm_mode_getplane_res(struct drm_device *dev, void *data, struct drm_mode_config *config; struct drm_plane *plane; uint32_t __user *plane_ptr; - int copied = 0; - unsigned num_planes; + int count = 0; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EINVAL; config = &dev->mode_config; - - if (file_priv->universal_planes) - num_planes = config->num_total_plane; - else - num_planes = config->num_overlay_plane; + plane_ptr = u64_to_user_ptr(plane_resp->plane_id_ptr); /* * This ioctl is called twice, once to determine how much space is * needed, and the 2nd time to fill it. */ - if (num_planes && - (plane_resp->count_planes >= num_planes)) { - plane_ptr = (uint32_t __user *)(unsigned long)plane_resp->plane_id_ptr; - - /* Plane lists are invariant, no locking needed. */ - drm_for_each_plane(plane, dev) { - /* - * Unless userspace set the 'universal planes' - * capability bit, only advertise overlays. - */ - if (plane->type != DRM_PLANE_TYPE_OVERLAY && - !file_priv->universal_planes) - continue; - - if (put_user(plane->base.id, plane_ptr + copied)) + drm_for_each_plane(plane, dev) { + /* + * Unless userspace set the 'universal planes' + * capability bit, only advertise overlays. + */ + if (plane->type != DRM_PLANE_TYPE_OVERLAY && + !file_priv->universal_planes) + continue; + + if (drm_lease_held(file_priv, plane->base.id)) { + if (count < plane_resp->count_planes && + put_user(plane->base.id, plane_ptr + count)) return -EFAULT; - copied++; + count++; } } - plane_resp->count_planes = num_planes; + plane_resp->count_planes = count; return 0; } @@ -518,9 +506,9 @@ int drm_mode_getplane(struct drm_device *dev, void *data, return -ENOENT; drm_modeset_lock(&plane->mutex, NULL); - if (plane->state && plane->state->crtc) + if (plane->state && plane->state->crtc && drm_lease_held(file_priv, plane->state->crtc->base.id)) plane_resp->crtc_id = plane->state->crtc->base.id; - else if (!plane->state && plane->crtc) + else if (!plane->state && plane->crtc && drm_lease_held(file_priv, plane->crtc->base.id)) plane_resp->crtc_id = plane->crtc->base.id; else plane_resp->crtc_id = 0; @@ -534,7 +522,9 @@ int drm_mode_getplane(struct drm_device *dev, void *data, drm_modeset_unlock(&plane->mutex); plane_resp->plane_id = plane->base.id; - plane_resp->possible_crtcs = plane->possible_crtcs; + plane_resp->possible_crtcs = drm_lease_filter_crtcs(file_priv, + plane->possible_crtcs); + plane_resp->gamma_size = 0; /* @@ -1039,7 +1029,7 @@ retry: } e->event.base.type = DRM_EVENT_FLIP_COMPLETE; e->event.base.length = sizeof(e->event); - e->event.user_data = page_flip->user_data; + e->event.vbl.user_data = page_flip->user_data; ret = drm_event_reserve_init(dev, file_priv, &e->base, &e->event.base); if (ret) { kfree(e); diff --git a/drivers/gpu/drm/drm_trace.h b/drivers/gpu/drm/drm_trace.h index 16c64d067e67..baccc63db106 100644 --- a/drivers/gpu/drm/drm_trace.h +++ b/drivers/gpu/drm/drm_trace.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #if !defined(_DRM_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) #define _DRM_TRACE_H_ diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index 3af6c20ba03b..09c1c4ff93ca 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -251,7 +251,7 @@ static void drm_update_vblank_count(struct drm_device *dev, unsigned int pipe, } DRM_DEBUG_VBL("updating vblank count on crtc %u:" - " current=%u, diff=%u, hw=%u hw_last=%u\n", + " current=%llu, diff=%u, hw=%u hw_last=%u\n", pipe, vblank->count, diff, cur_vblank, vblank->last); if (diff == 0) { @@ -299,8 +299,8 @@ u32 drm_crtc_accurate_vblank_count(struct drm_crtc *crtc) u32 vblank; unsigned long flags; - WARN(!dev->driver->get_vblank_timestamp, - "This function requires support for accurate vblank timestamps."); + WARN_ONCE(drm_debug & DRM_UT_VBL && !dev->driver->get_vblank_timestamp, + "This function requires support for accurate vblank timestamps."); spin_lock_irqsave(&dev->vblank_time_lock, flags); @@ -740,17 +740,31 @@ drm_get_last_vbltimestamp(struct drm_device *dev, unsigned int pipe, * Returns: * The software vblank counter. */ -u32 drm_crtc_vblank_count(struct drm_crtc *crtc) +u64 drm_crtc_vblank_count(struct drm_crtc *crtc) { return drm_vblank_count(crtc->dev, drm_crtc_index(crtc)); } EXPORT_SYMBOL(drm_crtc_vblank_count); -static u32 drm_vblank_count_and_time(struct drm_device *dev, unsigned int pipe, +/** + * drm_vblank_count_and_time - retrieve "cooked" vblank counter value and the + * system timestamp corresponding to that vblank counter value. + * @dev: DRM device + * @pipe: index of CRTC whose counter to retrieve + * @vblanktime: Pointer to ktime_t to receive the vblank timestamp. + * + * Fetches the "cooked" vblank count value that represents the number of + * vblank events since the system was booted, including lost events due to + * modesetting activity. Returns corresponding system timestamp of the time + * of the vblank interval that corresponds to the current vblank counter value. + * + * This is the legacy version of drm_crtc_vblank_count_and_time(). + */ +static u64 drm_vblank_count_and_time(struct drm_device *dev, unsigned int pipe, ktime_t *vblanktime) { struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; - u32 vblank_count; + u64 vblank_count; unsigned int seq; if (WARN_ON(pipe >= dev->num_crtcs)) { @@ -778,7 +792,7 @@ static u32 drm_vblank_count_and_time(struct drm_device *dev, unsigned int pipe, * modesetting activity. Returns corresponding system timestamp of the time * of the vblank interval that corresponds to the current vblank counter value. */ -u32 drm_crtc_vblank_count_and_time(struct drm_crtc *crtc, +u64 drm_crtc_vblank_count_and_time(struct drm_crtc *crtc, ktime_t *vblanktime) { return drm_vblank_count_and_time(crtc->dev, drm_crtc_index(crtc), @@ -788,22 +802,30 @@ EXPORT_SYMBOL(drm_crtc_vblank_count_and_time); static void send_vblank_event(struct drm_device *dev, struct drm_pending_vblank_event *e, - unsigned long seq, ktime_t now) + u64 seq, ktime_t now) { - struct timespec64 tv = ktime_to_timespec64(now); - - e->event.sequence = seq; - /* - * e->event is a user space structure, with hardcoded unsigned - * 32-bit seconds/microseconds. This is safe as we always use - * monotonic timestamps since linux-4.15 - */ - e->event.tv_sec = tv.tv_sec; - e->event.tv_usec = tv.tv_nsec / 1000; - - trace_drm_vblank_event_delivered(e->base.file_priv, e->pipe, - e->event.sequence); + struct timespec64 tv; + switch (e->event.base.type) { + case DRM_EVENT_VBLANK: + case DRM_EVENT_FLIP_COMPLETE: + tv = ktime_to_timespec64(now); + e->event.vbl.sequence = seq; + /* + * e->event is a user space structure, with hardcoded unsigned + * 32-bit seconds/microseconds. This is safe as we always use + * monotonic timestamps since linux-4.15 + */ + e->event.vbl.tv_sec = tv.tv_sec; + e->event.vbl.tv_usec = tv.tv_nsec / 1000; + break; + case DRM_EVENT_CRTC_SEQUENCE: + if (seq) + e->event.seq.sequence = seq; + e->event.seq.time_ns = ktime_to_ns(now); + break; + } + trace_drm_vblank_event_delivered(e->base.file_priv, e->pipe, seq); drm_send_event_locked(dev, &e->base); } @@ -854,8 +876,7 @@ void drm_crtc_arm_vblank_event(struct drm_crtc *crtc, assert_spin_locked(&dev->event_lock); e->pipe = pipe; - e->event.sequence = drm_crtc_accurate_vblank_count(crtc) + 1; - e->event.crtc_id = crtc->base.id; + e->sequence = drm_crtc_accurate_vblank_count(crtc) + 1; list_add_tail(&e->base.link, &dev->vblank_event_list); } EXPORT_SYMBOL(drm_crtc_arm_vblank_event); @@ -875,7 +896,8 @@ void drm_crtc_send_vblank_event(struct drm_crtc *crtc, struct drm_pending_vblank_event *e) { struct drm_device *dev = crtc->dev; - unsigned int seq, pipe = drm_crtc_index(crtc); + u64 seq; + unsigned int pipe = drm_crtc_index(crtc); ktime_t now; if (dev->num_crtcs > 0) { @@ -886,7 +908,6 @@ void drm_crtc_send_vblank_event(struct drm_crtc *crtc, now = ktime_get(); } e->pipe = pipe; - e->event.crtc_id = crtc->base.id; send_vblank_event(dev, e, seq, now); } EXPORT_SYMBOL(drm_crtc_send_vblank_event); @@ -1088,7 +1109,7 @@ void drm_crtc_vblank_off(struct drm_crtc *crtc) ktime_t now; unsigned long irqflags; - unsigned int seq; + u64 seq; if (WARN_ON(pipe >= dev->num_crtcs)) return; @@ -1123,8 +1144,8 @@ void drm_crtc_vblank_off(struct drm_crtc *crtc) if (e->pipe != pipe) continue; DRM_DEBUG("Sending premature vblank event on disable: " - "wanted %u, current %u\n", - e->event.sequence, seq); + "wanted %llu, current %llu\n", + e->sequence, seq); list_del(&e->base.link); drm_vblank_put(dev, pipe); send_vblank_event(dev, e, seq, now); @@ -1296,12 +1317,13 @@ int drm_legacy_modeset_ctl_ioctl(struct drm_device *dev, void *data, return 0; } -static inline bool vblank_passed(u32 seq, u32 ref) +static inline bool vblank_passed(u64 seq, u64 ref) { return (seq - ref) <= (1 << 23); } static int drm_queue_vblank_event(struct drm_device *dev, unsigned int pipe, + u64 req_seq, union drm_wait_vblank *vblwait, struct drm_file *file_priv) { @@ -1309,7 +1331,7 @@ static int drm_queue_vblank_event(struct drm_device *dev, unsigned int pipe, struct drm_pending_vblank_event *e; ktime_t now; unsigned long flags; - unsigned int seq; + u64 seq; int ret; e = kzalloc(sizeof(*e), GFP_KERNEL); @@ -1320,8 +1342,14 @@ static int drm_queue_vblank_event(struct drm_device *dev, unsigned int pipe, e->pipe = pipe; e->event.base.type = DRM_EVENT_VBLANK; - e->event.base.length = sizeof(e->event); - e->event.user_data = vblwait->request.signal; + e->event.base.length = sizeof(e->event.vbl); + e->event.vbl.user_data = vblwait->request.signal; + e->event.vbl.crtc_id = 0; + if (drm_core_check_feature(dev, DRIVER_MODESET)) { + struct drm_crtc *crtc = drm_crtc_from_index(dev, pipe); + if (crtc) + e->event.vbl.crtc_id = crtc->base.id; + } spin_lock_irqsave(&dev->event_lock, flags); @@ -1344,21 +1372,20 @@ static int drm_queue_vblank_event(struct drm_device *dev, unsigned int pipe, seq = drm_vblank_count_and_time(dev, pipe, &now); - DRM_DEBUG("event on vblank count %u, current %u, crtc %u\n", - vblwait->request.sequence, seq, pipe); + DRM_DEBUG("event on vblank count %llu, current %llu, crtc %u\n", + req_seq, seq, pipe); - trace_drm_vblank_event_queued(file_priv, pipe, - vblwait->request.sequence); + trace_drm_vblank_event_queued(file_priv, pipe, req_seq); - e->event.sequence = vblwait->request.sequence; - if (vblank_passed(seq, vblwait->request.sequence)) { + e->sequence = req_seq; + if (vblank_passed(seq, req_seq)) { drm_vblank_put(dev, pipe); send_vblank_event(dev, e, seq, now); vblwait->reply.sequence = seq; } else { /* drm_handle_vblank_events will call drm_vblank_put */ list_add_tail(&e->base.link, &dev->vblank_event_list); - vblwait->reply.sequence = vblwait->request.sequence; + vblwait->reply.sequence = req_seq; } spin_unlock_irqrestore(&dev->event_lock, flags); @@ -1384,6 +1411,22 @@ static bool drm_wait_vblank_is_query(union drm_wait_vblank *vblwait) _DRM_VBLANK_NEXTONMISS)); } +/* + * Widen a 32-bit param to 64-bits. + * + * \param narrow 32-bit value (missing upper 32 bits) + * \param near 64-bit value that should be 'close' to near + * + * This function returns a 64-bit value using the lower 32-bits from + * 'narrow' and constructing the upper 32-bits so that the result is + * as close as possible to 'near'. + */ + +static u64 widen_32_to_64(u32 narrow, u64 near) +{ + return near + (s32) (narrow - near); +} + static void drm_wait_vblank_reply(struct drm_device *dev, unsigned int pipe, struct drm_wait_vblank_reply *reply) { @@ -1404,10 +1447,13 @@ static void drm_wait_vblank_reply(struct drm_device *dev, unsigned int pipe, int drm_wait_vblank_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct drm_crtc *crtc; struct drm_vblank_crtc *vblank; union drm_wait_vblank *vblwait = data; int ret; - unsigned int flags, seq, pipe, high_pipe; + u64 req_seq, seq; + unsigned int pipe_index; + unsigned int flags, pipe, high_pipe; if (!dev->irq_enabled) return -EINVAL; @@ -1428,9 +1474,25 @@ int drm_wait_vblank_ioctl(struct drm_device *dev, void *data, flags = vblwait->request.type & _DRM_VBLANK_FLAGS_MASK; high_pipe = (vblwait->request.type & _DRM_VBLANK_HIGH_CRTC_MASK); if (high_pipe) - pipe = high_pipe >> _DRM_VBLANK_HIGH_CRTC_SHIFT; + pipe_index = high_pipe >> _DRM_VBLANK_HIGH_CRTC_SHIFT; else - pipe = flags & _DRM_VBLANK_SECONDARY ? 1 : 0; + pipe_index = flags & _DRM_VBLANK_SECONDARY ? 1 : 0; + + /* Convert lease-relative crtc index into global crtc index */ + if (drm_core_check_feature(dev, DRIVER_MODESET)) { + pipe = 0; + drm_for_each_crtc(crtc, dev) { + if (drm_lease_held(file_priv, crtc->base.id)) { + if (pipe_index == 0) + break; + pipe_index--; + } + pipe++; + } + } else { + pipe = pipe_index; + } + if (pipe >= dev->num_crtcs) return -EINVAL; @@ -1455,9 +1517,12 @@ int drm_wait_vblank_ioctl(struct drm_device *dev, void *data, switch (vblwait->request.type & _DRM_VBLANK_TYPES_MASK) { case _DRM_VBLANK_RELATIVE: - vblwait->request.sequence += seq; + req_seq = seq + vblwait->request.sequence; + vblwait->request.sequence = req_seq; vblwait->request.type &= ~_DRM_VBLANK_RELATIVE; + break; case _DRM_VBLANK_ABSOLUTE: + req_seq = widen_32_to_64(vblwait->request.sequence, seq); break; default: ret = -EINVAL; @@ -1465,22 +1530,25 @@ int drm_wait_vblank_ioctl(struct drm_device *dev, void *data, } if ((flags & _DRM_VBLANK_NEXTONMISS) && - vblank_passed(seq, vblwait->request.sequence)) - vblwait->request.sequence = seq + 1; + vblank_passed(seq, req_seq)) { + req_seq = seq + 1; + vblwait->request.type &= ~_DRM_VBLANK_NEXTONMISS; + vblwait->request.sequence = req_seq; + } if (flags & _DRM_VBLANK_EVENT) { /* must hold on to the vblank ref until the event fires * drm_vblank_put will be called asynchronously */ - return drm_queue_vblank_event(dev, pipe, vblwait, file_priv); + return drm_queue_vblank_event(dev, pipe, req_seq, vblwait, file_priv); } - if (vblwait->request.sequence != seq) { - DRM_DEBUG("waiting on vblank count %u, crtc %u\n", - vblwait->request.sequence, pipe); + if (req_seq != seq) { + DRM_DEBUG("waiting on vblank count %llu, crtc %u\n", + req_seq, pipe); DRM_WAIT_ON(ret, vblank->queue, 3 * HZ, vblank_passed(drm_vblank_count(dev, pipe), - vblwait->request.sequence) || + req_seq) || !READ_ONCE(vblank->enabled)); } @@ -1502,7 +1570,7 @@ static void drm_handle_vblank_events(struct drm_device *dev, unsigned int pipe) { struct drm_pending_vblank_event *e, *t; ktime_t now; - unsigned int seq; + u64 seq; assert_spin_locked(&dev->event_lock); @@ -1511,11 +1579,11 @@ static void drm_handle_vblank_events(struct drm_device *dev, unsigned int pipe) list_for_each_entry_safe(e, t, &dev->vblank_event_list, base.link) { if (e->pipe != pipe) continue; - if (!vblank_passed(seq, e->event.sequence)) + if (!vblank_passed(seq, e->sequence)) continue; - DRM_DEBUG("vblank event on %u, current %u\n", - e->event.sequence, seq); + DRM_DEBUG("vblank event on %llu, current %llu\n", + e->sequence, seq); list_del(&e->base.link); drm_vblank_put(dev, pipe); @@ -1605,3 +1673,166 @@ bool drm_crtc_handle_vblank(struct drm_crtc *crtc) return drm_handle_vblank(crtc->dev, drm_crtc_index(crtc)); } EXPORT_SYMBOL(drm_crtc_handle_vblank); + +/* + * Get crtc VBLANK count. + * + * \param dev DRM device + * \param data user arguement, pointing to a drm_crtc_get_sequence structure. + * \param file_priv drm file private for the user's open file descriptor + */ + +int drm_crtc_get_sequence_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_crtc *crtc; + struct drm_vblank_crtc *vblank; + int pipe; + struct drm_crtc_get_sequence *get_seq = data; + ktime_t now; + bool vblank_enabled; + int ret; + + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + + if (!dev->irq_enabled) + return -EINVAL; + + crtc = drm_crtc_find(dev, file_priv, get_seq->crtc_id); + if (!crtc) + return -ENOENT; + + pipe = drm_crtc_index(crtc); + + vblank = &dev->vblank[pipe]; + vblank_enabled = dev->vblank_disable_immediate && READ_ONCE(vblank->enabled); + + if (!vblank_enabled) { + ret = drm_crtc_vblank_get(crtc); + if (ret) { + DRM_DEBUG("crtc %d failed to acquire vblank counter, %d\n", pipe, ret); + return ret; + } + } + drm_modeset_lock(&crtc->mutex, NULL); + if (crtc->state) + get_seq->active = crtc->state->enable; + else + get_seq->active = crtc->enabled; + drm_modeset_unlock(&crtc->mutex); + get_seq->sequence = drm_vblank_count_and_time(dev, pipe, &now); + get_seq->sequence_ns = ktime_to_ns(now); + if (!vblank_enabled) + drm_crtc_vblank_put(crtc); + return 0; +} + +/* + * Queue a event for VBLANK sequence + * + * \param dev DRM device + * \param data user arguement, pointing to a drm_crtc_queue_sequence structure. + * \param file_priv drm file private for the user's open file descriptor + */ + +int drm_crtc_queue_sequence_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_crtc *crtc; + struct drm_vblank_crtc *vblank; + int pipe; + struct drm_crtc_queue_sequence *queue_seq = data; + ktime_t now; + struct drm_pending_vblank_event *e; + u32 flags; + u64 seq; + u64 req_seq; + int ret; + unsigned long spin_flags; + + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + + if (!dev->irq_enabled) + return -EINVAL; + + crtc = drm_crtc_find(dev, file_priv, queue_seq->crtc_id); + if (!crtc) + return -ENOENT; + + flags = queue_seq->flags; + /* Check valid flag bits */ + if (flags & ~(DRM_CRTC_SEQUENCE_RELATIVE| + DRM_CRTC_SEQUENCE_NEXT_ON_MISS)) + return -EINVAL; + + pipe = drm_crtc_index(crtc); + + vblank = &dev->vblank[pipe]; + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (e == NULL) + return -ENOMEM; + + ret = drm_crtc_vblank_get(crtc); + if (ret) { + DRM_DEBUG("crtc %d failed to acquire vblank counter, %d\n", pipe, ret); + goto err_free; + } + + seq = drm_vblank_count_and_time(dev, pipe, &now); + req_seq = queue_seq->sequence; + + if (flags & DRM_CRTC_SEQUENCE_RELATIVE) + req_seq += seq; + + if ((flags & DRM_CRTC_SEQUENCE_NEXT_ON_MISS) && vblank_passed(seq, req_seq)) + req_seq = seq + 1; + + e->pipe = pipe; + e->event.base.type = DRM_EVENT_CRTC_SEQUENCE; + e->event.base.length = sizeof(e->event.seq); + e->event.seq.user_data = queue_seq->user_data; + + spin_lock_irqsave(&dev->event_lock, spin_flags); + + /* + * drm_crtc_vblank_off() might have been called after we called + * drm_crtc_vblank_get(). drm_crtc_vblank_off() holds event_lock around the + * vblank disable, so no need for further locking. The reference from + * drm_crtc_vblank_get() protects against vblank disable from another source. + */ + if (!READ_ONCE(vblank->enabled)) { + ret = -EINVAL; + goto err_unlock; + } + + ret = drm_event_reserve_init_locked(dev, file_priv, &e->base, + &e->event.base); + + if (ret) + goto err_unlock; + + e->sequence = req_seq; + + if (vblank_passed(seq, req_seq)) { + drm_crtc_vblank_put(crtc); + send_vblank_event(dev, e, seq, now); + queue_seq->sequence = seq; + } else { + /* drm_handle_vblank_events will call drm_vblank_put */ + list_add_tail(&e->base.link, &dev->vblank_event_list); + queue_seq->sequence = req_seq; + } + + spin_unlock_irqrestore(&dev->event_lock, spin_flags); + return 0; + +err_unlock: + spin_unlock_irqrestore(&dev->event_lock, spin_flags); + drm_crtc_vblank_put(crtc); +err_free: + kfree(e); + return ret; +} diff --git a/drivers/gpu/drm/etnaviv/Makefile b/drivers/gpu/drm/etnaviv/Makefile index 15c3bfa89a79..1281c8d4fae5 100644 --- a/drivers/gpu/drm/etnaviv/Makefile +++ b/drivers/gpu/drm/etnaviv/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 etnaviv-y := \ etnaviv_buffer.o \ etnaviv_cmd_parser.o \ diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index 3fadb8d45e51..491eddf9b150 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -459,6 +459,9 @@ static int etnaviv_ioctl_pm_query_dom(struct drm_device *dev, void *data, struct drm_etnaviv_pm_domain *args = data; struct etnaviv_gpu *gpu; + /* reject as long as the feature isn't stable */ + return -EINVAL; + if (args->pipe >= ETNA_MAX_PIPES) return -EINVAL; @@ -476,6 +479,9 @@ static int etnaviv_ioctl_pm_query_sig(struct drm_device *dev, void *data, struct drm_etnaviv_pm_signal *args = data; struct etnaviv_gpu *gpu; + /* reject as long as the feature isn't stable */ + return -EINVAL; + if (args->pipe >= ETNA_MAX_PIPES) return -EINVAL; @@ -550,7 +556,7 @@ static struct drm_driver etnaviv_drm_driver = { .desc = "etnaviv DRM", .date = "20151214", .major = 1, - .minor = 2, + .minor = 1, }; /* diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 5884ab623e0a..daee3f1196df 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -760,7 +760,7 @@ static struct page **etnaviv_gem_userptr_do_get_pages( up_read(&mm->mmap_sem); if (ret < 0) { - release_pages(pvec, pinned, 0); + release_pages(pvec, pinned); kvfree(pvec); return ERR_PTR(ret); } @@ -833,7 +833,7 @@ static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj) } } - release_pages(pvec, pinned, 0); + release_pages(pvec, pinned); kvfree(pvec); work = kmalloc(sizeof(*work), GFP_KERNEL); @@ -867,7 +867,7 @@ static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj) if (etnaviv_obj->pages) { int npages = etnaviv_obj->base.size >> PAGE_SHIFT; - release_pages(etnaviv_obj->pages, npages, 0); + release_pages(etnaviv_obj->pages, npages); kvfree(etnaviv_obj->pages); } put_task_struct(etnaviv_obj->userptr.task); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 8197e1d6ed11..e19cbe05da2a 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -968,9 +968,9 @@ static void hangcheck_timer_reset(struct etnaviv_gpu *gpu) round_jiffies_up(jiffies + DRM_ETNAVIV_HANGCHECK_JIFFIES)); } -static void hangcheck_handler(unsigned long data) +static void hangcheck_handler(struct timer_list *t) { - struct etnaviv_gpu *gpu = (struct etnaviv_gpu *)data; + struct etnaviv_gpu *gpu = from_timer(gpu, t, hangcheck_timer); u32 fence = gpu->completed_fence; bool progress = false; @@ -1765,8 +1765,7 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master, INIT_WORK(&gpu->recover_work, recover_worker); init_waitqueue_head(&gpu->fence_event); - setup_deferrable_timer(&gpu->hangcheck_timer, hangcheck_handler, - (unsigned long)gpu); + timer_setup(&gpu->hangcheck_timer, hangcheck_handler, TIMER_DEFERRABLE); priv->gpu[priv->num_gpus++] = gpu; diff --git a/drivers/gpu/drm/etnaviv/state.xml.h b/drivers/gpu/drm/etnaviv/state.xml.h index 368218304566..c27c1484cfa9 100644 --- a/drivers/gpu/drm/etnaviv/state.xml.h +++ b/drivers/gpu/drm/etnaviv/state.xml.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef STATE_XML #define STATE_XML diff --git a/drivers/gpu/drm/etnaviv/state_3d.xml.h b/drivers/gpu/drm/etnaviv/state_3d.xml.h index d7146fd13943..73a97d35c51b 100644 --- a/drivers/gpu/drm/etnaviv/state_3d.xml.h +++ b/drivers/gpu/drm/etnaviv/state_3d.xml.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef STATE_3D_XML #define STATE_3D_XML diff --git a/drivers/gpu/drm/etnaviv/state_hi.xml.h b/drivers/gpu/drm/etnaviv/state_hi.xml.h index 43c73e2ed34f..60808daf7e8d 100644 --- a/drivers/gpu/drm/etnaviv/state_hi.xml.h +++ b/drivers/gpu/drm/etnaviv/state_hi.xml.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef STATE_HI_XML #define STATE_HI_XML diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig index 305dc3d4ff77..5a7c9d8abd6b 100644 --- a/drivers/gpu/drm/exynos/Kconfig +++ b/drivers/gpu/drm/exynos/Kconfig @@ -3,6 +3,7 @@ config DRM_EXYNOS depends on OF && DRM && (ARCH_S3C64XX || ARCH_EXYNOS || ARCH_MULTIPLATFORM) select DRM_KMS_HELPER select VIDEOMODE_HELPERS + select SND_SOC_HDMI_CODEC if SND_SOC help Choose this option if you have a Samsung SoC EXYNOS chipset. If M is selected the module will be called exynosdrm. diff --git a/drivers/gpu/drm/exynos/Makefile b/drivers/gpu/drm/exynos/Makefile index f663490e949d..bdf4212dde7b 100644 --- a/drivers/gpu/drm/exynos/Makefile +++ b/drivers/gpu/drm/exynos/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c index 6ce0821590df..dc01342e759a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c @@ -95,8 +95,23 @@ static enum drm_mode_status exynos_crtc_mode_valid(struct drm_crtc *crtc, return MODE_OK; } +static bool exynos_crtc_mode_fixup(struct drm_crtc *crtc, + const struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode) +{ + struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc); + + if (exynos_crtc->ops->mode_fixup) + return exynos_crtc->ops->mode_fixup(exynos_crtc, mode, + adjusted_mode); + + return true; +} + + static const struct drm_crtc_helper_funcs exynos_crtc_helper_funcs = { .mode_valid = exynos_crtc_mode_valid, + .mode_fixup = exynos_crtc_mode_fixup, .atomic_check = exynos_crtc_atomic_check, .atomic_begin = exynos_crtc_atomic_begin, .atomic_flush = exynos_crtc_atomic_flush, diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index e651a58c18cf..82b72425a42f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -168,11 +168,13 @@ static struct drm_driver exynos_drm_driver = { static int exynos_drm_suspend(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct exynos_drm_private *private = drm_dev->dev_private; + struct exynos_drm_private *private; if (pm_runtime_suspended(dev) || !drm_dev) return 0; + private = drm_dev->dev_private; + drm_kms_helper_poll_disable(drm_dev); exynos_drm_fbdev_suspend(drm_dev); private->suspend_state = drm_atomic_helper_suspend(drm_dev); @@ -188,11 +190,12 @@ static int exynos_drm_suspend(struct device *dev) static int exynos_drm_resume(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct exynos_drm_private *private = drm_dev->dev_private; + struct exynos_drm_private *private; if (pm_runtime_suspended(dev) || !drm_dev) return 0; + private = drm_dev->dev_private; drm_atomic_helper_resume(drm_dev, private->suspend_state); exynos_drm_fbdev_resume(drm_dev); drm_kms_helper_poll_enable(drm_dev); @@ -427,6 +430,7 @@ static void exynos_drm_unbind(struct device *dev) kfree(drm->dev_private); drm->dev_private = NULL; + dev_set_drvdata(dev, NULL); drm_dev_unref(drm); } diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index f8bae4cb4823..c6847fa708fa 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -136,6 +136,9 @@ struct exynos_drm_crtc_ops { u32 (*get_vblank_counter)(struct exynos_drm_crtc *crtc); enum drm_mode_status (*mode_valid)(struct exynos_drm_crtc *crtc, const struct drm_display_mode *mode); + bool (*mode_fixup)(struct exynos_drm_crtc *crtc, + const struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode); int (*atomic_check)(struct exynos_drm_crtc *crtc, struct drm_crtc_state *state); void (*atomic_begin)(struct exynos_drm_crtc *crtc); diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index 0109ff40b1db..82d1b7e2febe 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -40,7 +40,7 @@ #include <linux/component.h> #include <linux/mfd/syscon.h> #include <linux/regmap.h> - +#include <sound/hdmi-codec.h> #include <drm/exynos_drm.h> #include <media/cec-notifier.h> @@ -111,15 +111,20 @@ struct hdmi_driver_data { struct string_array_spec clk_muxes; }; +struct hdmi_audio { + struct platform_device *pdev; + struct hdmi_audio_infoframe infoframe; + struct hdmi_codec_params params; + bool mute; +}; + struct hdmi_context { struct drm_encoder encoder; struct device *dev; struct drm_device *drm_dev; struct drm_connector connector; - bool powered; bool dvi_mode; struct delayed_work hotplug_work; - struct drm_display_mode current_mode; struct cec_notifier *notifier; const struct hdmi_driver_data *drv_data; @@ -137,6 +142,11 @@ struct hdmi_context { struct regulator *reg_hdmi_en; struct exynos_drm_clk phy_clk; struct drm_bridge *bridge; + + /* mutex protecting subsequent fields below */ + struct mutex mutex; + struct hdmi_audio audio; + bool powered; }; static inline struct hdmi_context *encoder_to_hdmi(struct drm_encoder *e) @@ -298,6 +308,15 @@ static const struct hdmiphy_config hdmiphy_v14_configs[] = { }, }, { + .pixel_clock = 85500000, + .conf = { + 0x01, 0xd1, 0x24, 0x11, 0x40, 0x40, 0xd0, 0x08, + 0x84, 0xa0, 0xd6, 0xd8, 0x45, 0xa0, 0xac, 0x80, + 0x08, 0x80, 0x11, 0x04, 0x02, 0x22, 0x44, 0x86, + 0x54, 0x90, 0x24, 0x01, 0x00, 0x00, 0x01, 0x80, + }, + }, + { .pixel_clock = 106500000, .conf = { 0x01, 0xd1, 0x2c, 0x12, 0x40, 0x0c, 0x09, 0x08, @@ -768,8 +787,25 @@ static int hdmi_clk_set_parents(struct hdmi_context *hdata, bool to_phy) return ret; } +static int hdmi_audio_infoframe_apply(struct hdmi_context *hdata) +{ + struct hdmi_audio_infoframe *infoframe = &hdata->audio.infoframe; + u8 buf[HDMI_INFOFRAME_SIZE(AUDIO)]; + int len; + + len = hdmi_audio_infoframe_pack(infoframe, buf, sizeof(buf)); + if (len < 0) + return len; + + hdmi_reg_writeb(hdata, HDMI_AUI_CON, HDMI_AUI_CON_EVERY_VSYNC); + hdmi_reg_write_buf(hdata, HDMI_AUI_HEADER0, buf, len); + + return 0; +} + static void hdmi_reg_infoframes(struct hdmi_context *hdata) { + struct drm_display_mode *m = &hdata->encoder.crtc->state->mode; union hdmi_infoframe frm; u8 buf[25]; int ret; @@ -783,8 +819,7 @@ static void hdmi_reg_infoframes(struct hdmi_context *hdata) return; } - ret = drm_hdmi_avi_infoframe_from_display_mode(&frm.avi, - &hdata->current_mode, false); + ret = drm_hdmi_avi_infoframe_from_display_mode(&frm.avi, m, false); if (!ret) ret = hdmi_avi_infoframe_pack(&frm.avi, buf, sizeof(buf)); if (ret > 0) { @@ -794,8 +829,7 @@ static void hdmi_reg_infoframes(struct hdmi_context *hdata) DRM_INFO("%s: invalid AVI infoframe (%d)\n", __func__, ret); } - ret = drm_hdmi_vendor_infoframe_from_display_mode(&frm.vendor.hdmi, - &hdata->current_mode); + ret = drm_hdmi_vendor_infoframe_from_display_mode(&frm.vendor.hdmi, m); if (!ret) ret = hdmi_vendor_infoframe_pack(&frm.vendor.hdmi, buf, sizeof(buf)); @@ -805,15 +839,7 @@ static void hdmi_reg_infoframes(struct hdmi_context *hdata) hdmi_reg_write_buf(hdata, HDMI_VSI_DATA(0), buf + 3, ret - 3); } - ret = hdmi_audio_infoframe_init(&frm.audio); - if (!ret) { - frm.audio.channels = 2; - ret = hdmi_audio_infoframe_pack(&frm.audio, buf, sizeof(buf)); - } - if (ret > 0) { - hdmi_reg_writeb(hdata, HDMI_AUI_CON, HDMI_AUI_CON_EVERY_VSYNC); - hdmi_reg_write_buf(hdata, HDMI_AUI_HEADER0, buf, ret); - } + hdmi_audio_infoframe_apply(hdata); } static enum drm_connector_status hdmi_detect(struct drm_connector *connector, @@ -1003,23 +1029,18 @@ static void hdmi_reg_acr(struct hdmi_context *hdata, u32 freq) hdmi_reg_writeb(hdata, HDMI_ACR_CON, 4); } -static void hdmi_audio_init(struct hdmi_context *hdata) +static void hdmi_audio_config(struct hdmi_context *hdata) { - u32 sample_rate, bits_per_sample; - u32 data_num, bit_ch, sample_frq; - u32 val; - - sample_rate = 44100; - bits_per_sample = 16; + u32 bit_ch = 1; + u32 data_num, val; + int i; - switch (bits_per_sample) { + switch (hdata->audio.params.sample_width) { case 20: data_num = 2; - bit_ch = 1; break; case 24: data_num = 3; - bit_ch = 1; break; default: data_num = 1; @@ -1027,7 +1048,7 @@ static void hdmi_audio_init(struct hdmi_context *hdata) break; } - hdmi_reg_acr(hdata, sample_rate); + hdmi_reg_acr(hdata, hdata->audio.params.sample_rate); hdmi_reg_writeb(hdata, HDMI_I2S_MUX_CON, HDMI_I2S_IN_DISABLE | HDMI_I2S_AUD_I2S | HDMI_I2S_CUV_I2S_ENABLE @@ -1037,12 +1058,6 @@ static void hdmi_audio_init(struct hdmi_context *hdata) | HDMI_I2S_CH1_EN | HDMI_I2S_CH2_EN); hdmi_reg_writeb(hdata, HDMI_I2S_MUX_CUV, HDMI_I2S_CUV_RL_EN); - - sample_frq = (sample_rate == 44100) ? 0 : - (sample_rate == 48000) ? 2 : - (sample_rate == 32000) ? 3 : - (sample_rate == 96000) ? 0xa : 0x0; - hdmi_reg_writeb(hdata, HDMI_I2S_CLK_CON, HDMI_I2S_CLK_DIS); hdmi_reg_writeb(hdata, HDMI_I2S_CLK_CON, HDMI_I2S_CLK_EN); @@ -1066,39 +1081,33 @@ static void hdmi_audio_init(struct hdmi_context *hdata) | HDMI_I2S_SET_SDATA_BIT(data_num) | HDMI_I2S_BASIC_FORMAT); - /* Configure register related to CUV information */ - hdmi_reg_writeb(hdata, HDMI_I2S_CH_ST_0, HDMI_I2S_CH_STATUS_MODE_0 - | HDMI_I2S_2AUD_CH_WITHOUT_PREEMPH - | HDMI_I2S_COPYRIGHT - | HDMI_I2S_LINEAR_PCM - | HDMI_I2S_CONSUMER_FORMAT); - hdmi_reg_writeb(hdata, HDMI_I2S_CH_ST_1, HDMI_I2S_CD_PLAYER); - hdmi_reg_writeb(hdata, HDMI_I2S_CH_ST_2, HDMI_I2S_SET_SOURCE_NUM(0)); - hdmi_reg_writeb(hdata, HDMI_I2S_CH_ST_3, HDMI_I2S_CLK_ACCUR_LEVEL_2 - | HDMI_I2S_SET_SMP_FREQ(sample_frq)); - hdmi_reg_writeb(hdata, HDMI_I2S_CH_ST_4, - HDMI_I2S_ORG_SMP_FREQ_44_1 - | HDMI_I2S_WORD_LEN_MAX24_24BITS - | HDMI_I2S_WORD_LEN_MAX_24BITS); + /* Configuration of the audio channel status registers */ + for (i = 0; i < HDMI_I2S_CH_ST_MAXNUM; i++) + hdmi_reg_writeb(hdata, HDMI_I2S_CH_ST(i), + hdata->audio.params.iec.status[i]); hdmi_reg_writeb(hdata, HDMI_I2S_CH_ST_CON, HDMI_I2S_CH_STATUS_RELOAD); } -static void hdmi_audio_control(struct hdmi_context *hdata, bool onoff) +static void hdmi_audio_control(struct hdmi_context *hdata) { + bool enable = !hdata->audio.mute; + if (hdata->dvi_mode) return; - hdmi_reg_writeb(hdata, HDMI_AUI_CON, onoff ? 2 : 0); - hdmi_reg_writemask(hdata, HDMI_CON_0, onoff ? + hdmi_reg_writeb(hdata, HDMI_AUI_CON, enable ? + HDMI_AVI_CON_EVERY_VSYNC : HDMI_AUI_CON_NO_TRAN); + hdmi_reg_writemask(hdata, HDMI_CON_0, enable ? HDMI_ASP_EN : HDMI_ASP_DIS, HDMI_ASP_MASK); } static void hdmi_start(struct hdmi_context *hdata, bool start) { + struct drm_display_mode *m = &hdata->encoder.crtc->state->mode; u32 val = start ? HDMI_TG_EN : 0; - if (hdata->current_mode.flags & DRM_MODE_FLAG_INTERLACE) + if (m->flags & DRM_MODE_FLAG_INTERLACE) val |= HDMI_FIELD_EN; hdmi_reg_writemask(hdata, HDMI_CON_0, val, HDMI_EN); @@ -1168,7 +1177,7 @@ static void hdmiphy_wait_for_pll(struct hdmi_context *hdata) static void hdmi_v13_mode_apply(struct hdmi_context *hdata) { - struct drm_display_mode *m = &hdata->current_mode; + struct drm_display_mode *m = &hdata->encoder.crtc->state->mode; unsigned int val; hdmi_reg_writev(hdata, HDMI_H_BLANK_0, 2, m->htotal - m->hdisplay); @@ -1247,7 +1256,19 @@ static void hdmi_v13_mode_apply(struct hdmi_context *hdata) static void hdmi_v14_mode_apply(struct hdmi_context *hdata) { - struct drm_display_mode *m = &hdata->current_mode; + struct drm_display_mode *m = &hdata->encoder.crtc->state->mode; + struct drm_display_mode *am = + &hdata->encoder.crtc->state->adjusted_mode; + int hquirk = 0; + + /* + * In case video mode coming from CRTC differs from requested one HDMI + * sometimes is able to almost properly perform conversion - only + * first line is distorted. + */ + if ((m->vdisplay != am->vdisplay) && + (m->hdisplay == 1280 || m->hdisplay == 1024 || m->hdisplay == 1366)) + hquirk = 258; hdmi_reg_writev(hdata, HDMI_H_BLANK_0, 2, m->htotal - m->hdisplay); hdmi_reg_writev(hdata, HDMI_V_LINE_0, 2, m->vtotal); @@ -1341,8 +1362,9 @@ static void hdmi_v14_mode_apply(struct hdmi_context *hdata) hdmi_reg_writev(hdata, HDMI_V_SYNC_LINE_AFT_PXL_6_0, 2, 0xffff); hdmi_reg_writev(hdata, HDMI_TG_H_FSZ_L, 2, m->htotal); - hdmi_reg_writev(hdata, HDMI_TG_HACT_ST_L, 2, m->htotal - m->hdisplay); - hdmi_reg_writev(hdata, HDMI_TG_HACT_SZ_L, 2, m->hdisplay); + hdmi_reg_writev(hdata, HDMI_TG_HACT_ST_L, 2, + m->htotal - m->hdisplay - hquirk); + hdmi_reg_writev(hdata, HDMI_TG_HACT_SZ_L, 2, m->hdisplay + hquirk); hdmi_reg_writev(hdata, HDMI_TG_V_FSZ_L, 2, m->vtotal); if (hdata->drv_data == &exynos5433_hdmi_driver_data) hdmi_reg_writeb(hdata, HDMI_TG_DECON_EN, 1); @@ -1380,10 +1402,11 @@ static void hdmiphy_enable_mode_set(struct hdmi_context *hdata, bool enable) static void hdmiphy_conf_apply(struct hdmi_context *hdata) { + struct drm_display_mode *m = &hdata->encoder.crtc->state->mode; int ret; const u8 *phy_conf; - ret = hdmi_find_phy_conf(hdata, hdata->current_mode.clock * 1000); + ret = hdmi_find_phy_conf(hdata, m->clock * 1000); if (ret < 0) { DRM_ERROR("failed to find hdmiphy conf\n"); return; @@ -1406,28 +1429,14 @@ static void hdmiphy_conf_apply(struct hdmi_context *hdata) hdmiphy_wait_for_pll(hdata); } +/* Should be called with hdata->mutex mutex held */ static void hdmi_conf_apply(struct hdmi_context *hdata) { hdmi_start(hdata, false); hdmi_conf_init(hdata); - hdmi_audio_init(hdata); + hdmi_audio_config(hdata); hdmi_mode_apply(hdata); - hdmi_audio_control(hdata, true); -} - -static void hdmi_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - struct hdmi_context *hdata = encoder_to_hdmi(encoder); - struct drm_display_mode *m = adjusted_mode; - - DRM_DEBUG_KMS("xres=%d, yres=%d, refresh=%d, intl=%s\n", - m->hdisplay, m->vdisplay, - m->vrefresh, (m->flags & DRM_MODE_FLAG_INTERLACE) ? - "INTERLACED" : "PROGRESSIVE"); - - drm_mode_copy(&hdata->current_mode, m); + hdmi_audio_control(hdata); } static void hdmi_set_refclk(struct hdmi_context *hdata, bool on) @@ -1439,6 +1448,7 @@ static void hdmi_set_refclk(struct hdmi_context *hdata, bool on) SYSREG_HDMI_REFCLK_INT_CLK, on ? ~0 : 0); } +/* Should be called with hdata->mutex mutex held. */ static void hdmiphy_enable(struct hdmi_context *hdata) { if (hdata->powered) @@ -1461,6 +1471,7 @@ static void hdmiphy_enable(struct hdmi_context *hdata) hdata->powered = true; } +/* Should be called with hdata->mutex mutex held. */ static void hdmiphy_disable(struct hdmi_context *hdata) { if (!hdata->powered) @@ -1486,33 +1497,42 @@ static void hdmi_enable(struct drm_encoder *encoder) { struct hdmi_context *hdata = encoder_to_hdmi(encoder); + mutex_lock(&hdata->mutex); + hdmiphy_enable(hdata); hdmi_conf_apply(hdata); + + mutex_unlock(&hdata->mutex); } static void hdmi_disable(struct drm_encoder *encoder) { struct hdmi_context *hdata = encoder_to_hdmi(encoder); - if (!hdata->powered) + mutex_lock(&hdata->mutex); + + if (hdata->powered) { + /* + * The SFRs of VP and Mixer are updated by Vertical Sync of + * Timing generator which is a part of HDMI so the sequence + * to disable TV Subsystem should be as following, + * VP -> Mixer -> HDMI + * + * To achieve such sequence HDMI is disabled together with + * HDMI PHY, via pipe clock callback. + */ + mutex_unlock(&hdata->mutex); + cancel_delayed_work(&hdata->hotplug_work); + cec_notifier_set_phys_addr(hdata->notifier, + CEC_PHYS_ADDR_INVALID); return; + } - /* - * The SFRs of VP and Mixer are updated by Vertical Sync of - * Timing generator which is a part of HDMI so the sequence - * to disable TV Subsystem should be as following, - * VP -> Mixer -> HDMI - * - * To achieve such sequence HDMI is disabled together with HDMI PHY, via - * pipe clock callback. - */ - cancel_delayed_work(&hdata->hotplug_work); - cec_notifier_set_phys_addr(hdata->notifier, CEC_PHYS_ADDR_INVALID); + mutex_unlock(&hdata->mutex); } static const struct drm_encoder_helper_funcs exynos_hdmi_encoder_helper_funcs = { .mode_fixup = hdmi_mode_fixup, - .mode_set = hdmi_mode_set, .enable = hdmi_enable, .disable = hdmi_disable, }; @@ -1521,6 +1541,99 @@ static const struct drm_encoder_funcs exynos_hdmi_encoder_funcs = { .destroy = drm_encoder_cleanup, }; +static void hdmi_audio_shutdown(struct device *dev, void *data) +{ + struct hdmi_context *hdata = dev_get_drvdata(dev); + + mutex_lock(&hdata->mutex); + + hdata->audio.mute = true; + + if (hdata->powered) + hdmi_audio_control(hdata); + + mutex_unlock(&hdata->mutex); +} + +static int hdmi_audio_hw_params(struct device *dev, void *data, + struct hdmi_codec_daifmt *daifmt, + struct hdmi_codec_params *params) +{ + struct hdmi_context *hdata = dev_get_drvdata(dev); + + if (daifmt->fmt != HDMI_I2S || daifmt->bit_clk_inv || + daifmt->frame_clk_inv || daifmt->bit_clk_master || + daifmt->frame_clk_master) { + dev_err(dev, "%s: Bad flags %d %d %d %d\n", __func__, + daifmt->bit_clk_inv, daifmt->frame_clk_inv, + daifmt->bit_clk_master, + daifmt->frame_clk_master); + return -EINVAL; + } + + mutex_lock(&hdata->mutex); + + hdata->audio.params = *params; + + if (hdata->powered) { + hdmi_audio_config(hdata); + hdmi_audio_infoframe_apply(hdata); + } + + mutex_unlock(&hdata->mutex); + + return 0; +} + +static int hdmi_audio_digital_mute(struct device *dev, void *data, bool mute) +{ + struct hdmi_context *hdata = dev_get_drvdata(dev); + + mutex_lock(&hdata->mutex); + + hdata->audio.mute = mute; + + if (hdata->powered) + hdmi_audio_control(hdata); + + mutex_unlock(&hdata->mutex); + + return 0; +} + +static int hdmi_audio_get_eld(struct device *dev, void *data, uint8_t *buf, + size_t len) +{ + struct hdmi_context *hdata = dev_get_drvdata(dev); + struct drm_connector *connector = &hdata->connector; + + memcpy(buf, connector->eld, min(sizeof(connector->eld), len)); + + return 0; +} + +static const struct hdmi_codec_ops audio_codec_ops = { + .hw_params = hdmi_audio_hw_params, + .audio_shutdown = hdmi_audio_shutdown, + .digital_mute = hdmi_audio_digital_mute, + .get_eld = hdmi_audio_get_eld, +}; + +static int hdmi_register_audio_device(struct hdmi_context *hdata) +{ + struct hdmi_codec_pdata codec_data = { + .ops = &audio_codec_ops, + .max_i2s_channels = 6, + .i2s = 1, + }; + + hdata->audio.pdev = platform_device_register_data( + hdata->dev, HDMI_CODEC_DRV_NAME, PLATFORM_DEVID_AUTO, + &codec_data, sizeof(codec_data)); + + return PTR_ERR_OR_ZERO(hdata->audio.pdev); +} + static void hdmi_hotplug_work_func(struct work_struct *work) { struct hdmi_context *hdata; @@ -1596,11 +1709,14 @@ static void hdmiphy_clk_enable(struct exynos_drm_clk *clk, bool enable) { struct hdmi_context *hdata = container_of(clk, struct hdmi_context, phy_clk); + mutex_lock(&hdata->mutex); if (enable) hdmiphy_enable(hdata); else hdmiphy_disable(hdata); + + mutex_unlock(&hdata->mutex); } static int hdmi_bridge_init(struct hdmi_context *hdata) @@ -1811,6 +1927,7 @@ out: static int hdmi_probe(struct platform_device *pdev) { + struct hdmi_audio_infoframe *audio_infoframe; struct device *dev = &pdev->dev; struct hdmi_context *hdata; struct resource *res; @@ -1826,6 +1943,8 @@ static int hdmi_probe(struct platform_device *pdev) hdata->dev = dev; + mutex_init(&hdata->mutex); + ret = hdmi_resources_init(hdata); if (ret) { if (ret != -EPROBE_DEFER) @@ -1885,12 +2004,26 @@ static int hdmi_probe(struct platform_device *pdev) pm_runtime_enable(dev); - ret = component_add(&pdev->dev, &hdmi_component_ops); + audio_infoframe = &hdata->audio.infoframe; + hdmi_audio_infoframe_init(audio_infoframe); + audio_infoframe->coding_type = HDMI_AUDIO_CODING_TYPE_STREAM; + audio_infoframe->sample_size = HDMI_AUDIO_SAMPLE_SIZE_STREAM; + audio_infoframe->sample_frequency = HDMI_AUDIO_SAMPLE_FREQUENCY_STREAM; + audio_infoframe->channels = 2; + + ret = hdmi_register_audio_device(hdata); if (ret) goto err_notifier_put; + ret = component_add(&pdev->dev, &hdmi_component_ops); + if (ret) + goto err_unregister_audio; + return ret; +err_unregister_audio: + platform_device_unregister(hdata->audio.pdev); + err_notifier_put: cec_notifier_put(hdata->notifier); pm_runtime_disable(dev); @@ -1914,6 +2047,7 @@ static int hdmi_remove(struct platform_device *pdev) cec_notifier_set_phys_addr(hdata->notifier, CEC_PHYS_ADDR_INVALID); component_del(&pdev->dev, &hdmi_component_ops); + platform_device_unregister(hdata->audio.pdev); cec_notifier_put(hdata->notifier); pm_runtime_disable(&pdev->dev); @@ -1929,6 +2063,8 @@ static int hdmi_remove(struct platform_device *pdev) put_device(&hdata->ddc_adpt->dev); + mutex_destroy(&hdata->mutex); + return 0; } diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 002755415e00..dc5d79465f9b 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -67,19 +67,6 @@ #define MXR_FORMAT_ARGB4444 6 #define MXR_FORMAT_ARGB8888 7 -struct mixer_resources { - int irq; - void __iomem *mixer_regs; - void __iomem *vp_regs; - spinlock_t reg_slock; - struct clk *mixer; - struct clk *vp; - struct clk *hdmi; - struct clk *sclk_mixer; - struct clk *sclk_hdmi; - struct clk *mout_mixer; -}; - enum mixer_version_id { MXR_VER_0_0_0_16, MXR_VER_16_0_33_0, @@ -117,8 +104,18 @@ struct mixer_context { struct exynos_drm_plane planes[MIXER_WIN_NR]; unsigned long flags; - struct mixer_resources mixer_res; + int irq; + void __iomem *mixer_regs; + void __iomem *vp_regs; + spinlock_t reg_slock; + struct clk *mixer; + struct clk *vp; + struct clk *hdmi; + struct clk *sclk_mixer; + struct clk *sclk_hdmi; + struct clk *mout_mixer; enum mixer_version_id mxr_ver; + int scan_value; }; struct mixer_drv_data { @@ -194,44 +191,44 @@ static inline bool is_alpha_format(unsigned int pixel_format) } } -static inline u32 vp_reg_read(struct mixer_resources *res, u32 reg_id) +static inline u32 vp_reg_read(struct mixer_context *ctx, u32 reg_id) { - return readl(res->vp_regs + reg_id); + return readl(ctx->vp_regs + reg_id); } -static inline void vp_reg_write(struct mixer_resources *res, u32 reg_id, +static inline void vp_reg_write(struct mixer_context *ctx, u32 reg_id, u32 val) { - writel(val, res->vp_regs + reg_id); + writel(val, ctx->vp_regs + reg_id); } -static inline void vp_reg_writemask(struct mixer_resources *res, u32 reg_id, +static inline void vp_reg_writemask(struct mixer_context *ctx, u32 reg_id, u32 val, u32 mask) { - u32 old = vp_reg_read(res, reg_id); + u32 old = vp_reg_read(ctx, reg_id); val = (val & mask) | (old & ~mask); - writel(val, res->vp_regs + reg_id); + writel(val, ctx->vp_regs + reg_id); } -static inline u32 mixer_reg_read(struct mixer_resources *res, u32 reg_id) +static inline u32 mixer_reg_read(struct mixer_context *ctx, u32 reg_id) { - return readl(res->mixer_regs + reg_id); + return readl(ctx->mixer_regs + reg_id); } -static inline void mixer_reg_write(struct mixer_resources *res, u32 reg_id, +static inline void mixer_reg_write(struct mixer_context *ctx, u32 reg_id, u32 val) { - writel(val, res->mixer_regs + reg_id); + writel(val, ctx->mixer_regs + reg_id); } -static inline void mixer_reg_writemask(struct mixer_resources *res, +static inline void mixer_reg_writemask(struct mixer_context *ctx, u32 reg_id, u32 val, u32 mask) { - u32 old = mixer_reg_read(res, reg_id); + u32 old = mixer_reg_read(ctx, reg_id); val = (val & mask) | (old & ~mask); - writel(val, res->mixer_regs + reg_id); + writel(val, ctx->mixer_regs + reg_id); } static void mixer_regs_dump(struct mixer_context *ctx) @@ -239,7 +236,7 @@ static void mixer_regs_dump(struct mixer_context *ctx) #define DUMPREG(reg_id) \ do { \ DRM_DEBUG_KMS(#reg_id " = %08x\n", \ - (u32)readl(ctx->mixer_res.mixer_regs + reg_id)); \ + (u32)readl(ctx->mixer_regs + reg_id)); \ } while (0) DUMPREG(MXR_STATUS); @@ -271,7 +268,7 @@ static void vp_regs_dump(struct mixer_context *ctx) #define DUMPREG(reg_id) \ do { \ DRM_DEBUG_KMS(#reg_id " = %08x\n", \ - (u32) readl(ctx->mixer_res.vp_regs + reg_id)); \ + (u32) readl(ctx->vp_regs + reg_id)); \ } while (0) DUMPREG(VP_ENABLE); @@ -301,7 +298,7 @@ do { \ #undef DUMPREG } -static inline void vp_filter_set(struct mixer_resources *res, +static inline void vp_filter_set(struct mixer_context *ctx, int reg_id, const u8 *data, unsigned int size) { /* assure 4-byte align */ @@ -309,24 +306,23 @@ static inline void vp_filter_set(struct mixer_resources *res, for (; size; size -= 4, reg_id += 4, data += 4) { u32 val = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; - vp_reg_write(res, reg_id, val); + vp_reg_write(ctx, reg_id, val); } } -static void vp_default_filter(struct mixer_resources *res) +static void vp_default_filter(struct mixer_context *ctx) { - vp_filter_set(res, VP_POLY8_Y0_LL, + vp_filter_set(ctx, VP_POLY8_Y0_LL, filter_y_horiz_tap8, sizeof(filter_y_horiz_tap8)); - vp_filter_set(res, VP_POLY4_Y0_LL, + vp_filter_set(ctx, VP_POLY4_Y0_LL, filter_y_vert_tap4, sizeof(filter_y_vert_tap4)); - vp_filter_set(res, VP_POLY4_C0_LL, + vp_filter_set(ctx, VP_POLY4_C0_LL, filter_cr_horiz_tap4, sizeof(filter_cr_horiz_tap4)); } static void mixer_cfg_gfx_blend(struct mixer_context *ctx, unsigned int win, bool alpha) { - struct mixer_resources *res = &ctx->mixer_res; u32 val; val = MXR_GRP_CFG_COLOR_KEY_DISABLE; /* no blank key */ @@ -335,13 +331,12 @@ static void mixer_cfg_gfx_blend(struct mixer_context *ctx, unsigned int win, val |= MXR_GRP_CFG_BLEND_PRE_MUL; val |= MXR_GRP_CFG_PIXEL_BLEND_EN; } - mixer_reg_writemask(res, MXR_GRAPHIC_CFG(win), + mixer_reg_writemask(ctx, MXR_GRAPHIC_CFG(win), val, MXR_GRP_CFG_MISC_MASK); } static void mixer_cfg_vp_blend(struct mixer_context *ctx) { - struct mixer_resources *res = &ctx->mixer_res; u32 val; /* @@ -351,51 +346,39 @@ static void mixer_cfg_vp_blend(struct mixer_context *ctx) * support blending of the video layer through this. */ val = 0; - mixer_reg_write(res, MXR_VIDEO_CFG, val); + mixer_reg_write(ctx, MXR_VIDEO_CFG, val); } static void mixer_vsync_set_update(struct mixer_context *ctx, bool enable) { - struct mixer_resources *res = &ctx->mixer_res; - /* block update on vsync */ - mixer_reg_writemask(res, MXR_STATUS, enable ? + mixer_reg_writemask(ctx, MXR_STATUS, enable ? MXR_STATUS_SYNC_ENABLE : 0, MXR_STATUS_SYNC_ENABLE); if (test_bit(MXR_BIT_VP_ENABLED, &ctx->flags)) - vp_reg_write(res, VP_SHADOW_UPDATE, enable ? + vp_reg_write(ctx, VP_SHADOW_UPDATE, enable ? VP_SHADOW_UPDATE_ENABLE : 0); } -static void mixer_cfg_scan(struct mixer_context *ctx, unsigned int height) +static void mixer_cfg_scan(struct mixer_context *ctx, int width, int height) { - struct mixer_resources *res = &ctx->mixer_res; u32 val; /* choosing between interlace and progressive mode */ val = test_bit(MXR_BIT_INTERLACE, &ctx->flags) ? MXR_CFG_SCAN_INTERLACE : MXR_CFG_SCAN_PROGRESSIVE; - if (ctx->mxr_ver != MXR_VER_128_0_0_184) { - /* choosing between proper HD and SD mode */ - if (height <= 480) - val |= MXR_CFG_SCAN_NTSC | MXR_CFG_SCAN_SD; - else if (height <= 576) - val |= MXR_CFG_SCAN_PAL | MXR_CFG_SCAN_SD; - else if (height <= 720) - val |= MXR_CFG_SCAN_HD_720 | MXR_CFG_SCAN_HD; - else if (height <= 1080) - val |= MXR_CFG_SCAN_HD_1080 | MXR_CFG_SCAN_HD; - else - val |= MXR_CFG_SCAN_HD_720 | MXR_CFG_SCAN_HD; - } + if (ctx->mxr_ver == MXR_VER_128_0_0_184) + mixer_reg_write(ctx, MXR_RESOLUTION, + MXR_MXR_RES_HEIGHT(height) | MXR_MXR_RES_WIDTH(width)); + else + val |= ctx->scan_value; - mixer_reg_writemask(res, MXR_CFG, val, MXR_CFG_SCAN_MASK); + mixer_reg_writemask(ctx, MXR_CFG, val, MXR_CFG_SCAN_MASK); } static void mixer_cfg_rgb_fmt(struct mixer_context *ctx, unsigned int height) { - struct mixer_resources *res = &ctx->mixer_res; u32 val; switch (height) { @@ -408,45 +391,44 @@ static void mixer_cfg_rgb_fmt(struct mixer_context *ctx, unsigned int height) default: val = MXR_CFG_RGB709_16_235; /* Configure the BT.709 CSC matrix for full range RGB. */ - mixer_reg_write(res, MXR_CM_COEFF_Y, + mixer_reg_write(ctx, MXR_CM_COEFF_Y, MXR_CSC_CT( 0.184, 0.614, 0.063) | MXR_CM_COEFF_RGB_FULL); - mixer_reg_write(res, MXR_CM_COEFF_CB, + mixer_reg_write(ctx, MXR_CM_COEFF_CB, MXR_CSC_CT(-0.102, -0.338, 0.440)); - mixer_reg_write(res, MXR_CM_COEFF_CR, + mixer_reg_write(ctx, MXR_CM_COEFF_CR, MXR_CSC_CT( 0.440, -0.399, -0.040)); break; } - mixer_reg_writemask(res, MXR_CFG, val, MXR_CFG_RGB_FMT_MASK); + mixer_reg_writemask(ctx, MXR_CFG, val, MXR_CFG_RGB_FMT_MASK); } static void mixer_cfg_layer(struct mixer_context *ctx, unsigned int win, unsigned int priority, bool enable) { - struct mixer_resources *res = &ctx->mixer_res; u32 val = enable ? ~0 : 0; switch (win) { case 0: - mixer_reg_writemask(res, MXR_CFG, val, MXR_CFG_GRP0_ENABLE); - mixer_reg_writemask(res, MXR_LAYER_CFG, + mixer_reg_writemask(ctx, MXR_CFG, val, MXR_CFG_GRP0_ENABLE); + mixer_reg_writemask(ctx, MXR_LAYER_CFG, MXR_LAYER_CFG_GRP0_VAL(priority), MXR_LAYER_CFG_GRP0_MASK); break; case 1: - mixer_reg_writemask(res, MXR_CFG, val, MXR_CFG_GRP1_ENABLE); - mixer_reg_writemask(res, MXR_LAYER_CFG, + mixer_reg_writemask(ctx, MXR_CFG, val, MXR_CFG_GRP1_ENABLE); + mixer_reg_writemask(ctx, MXR_LAYER_CFG, MXR_LAYER_CFG_GRP1_VAL(priority), MXR_LAYER_CFG_GRP1_MASK); break; case VP_DEFAULT_WIN: if (test_bit(MXR_BIT_VP_ENABLED, &ctx->flags)) { - vp_reg_writemask(res, VP_ENABLE, val, VP_ENABLE_ON); - mixer_reg_writemask(res, MXR_CFG, val, + vp_reg_writemask(ctx, VP_ENABLE, val, VP_ENABLE_ON); + mixer_reg_writemask(ctx, MXR_CFG, val, MXR_CFG_VP_ENABLE); - mixer_reg_writemask(res, MXR_LAYER_CFG, + mixer_reg_writemask(ctx, MXR_LAYER_CFG, MXR_LAYER_CFG_VP_VAL(priority), MXR_LAYER_CFG_VP_MASK); } @@ -456,30 +438,34 @@ static void mixer_cfg_layer(struct mixer_context *ctx, unsigned int win, static void mixer_run(struct mixer_context *ctx) { - struct mixer_resources *res = &ctx->mixer_res; - - mixer_reg_writemask(res, MXR_STATUS, ~0, MXR_STATUS_REG_RUN); + mixer_reg_writemask(ctx, MXR_STATUS, ~0, MXR_STATUS_REG_RUN); } static void mixer_stop(struct mixer_context *ctx) { - struct mixer_resources *res = &ctx->mixer_res; int timeout = 20; - mixer_reg_writemask(res, MXR_STATUS, 0, MXR_STATUS_REG_RUN); + mixer_reg_writemask(ctx, MXR_STATUS, 0, MXR_STATUS_REG_RUN); - while (!(mixer_reg_read(res, MXR_STATUS) & MXR_STATUS_REG_IDLE) && + while (!(mixer_reg_read(ctx, MXR_STATUS) & MXR_STATUS_REG_IDLE) && --timeout) usleep_range(10000, 12000); } +static void mixer_commit(struct mixer_context *ctx) +{ + struct drm_display_mode *mode = &ctx->crtc->base.state->adjusted_mode; + + mixer_cfg_scan(ctx, mode->hdisplay, mode->vdisplay); + mixer_cfg_rgb_fmt(ctx, mode->vdisplay); + mixer_run(ctx); +} + static void vp_video_buffer(struct mixer_context *ctx, struct exynos_drm_plane *plane) { struct exynos_drm_plane_state *state = to_exynos_plane_state(plane->base.state); - struct drm_display_mode *mode = &state->base.crtc->state->adjusted_mode; - struct mixer_resources *res = &ctx->mixer_res; struct drm_framebuffer *fb = state->base.fb; unsigned int priority = state->base.normalized_zpos + 1; unsigned long flags; @@ -493,8 +479,7 @@ static void vp_video_buffer(struct mixer_context *ctx, luma_addr[0] = exynos_drm_fb_dma_addr(fb, 0); chroma_addr[0] = exynos_drm_fb_dma_addr(fb, 1); - if (mode->flags & DRM_MODE_FLAG_INTERLACE) { - __set_bit(MXR_BIT_INTERLACE, &ctx->flags); + if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) { if (is_tiled) { luma_addr[1] = luma_addr[0] + 0x40; chroma_addr[1] = chroma_addr[0] + 0x40; @@ -503,63 +488,59 @@ static void vp_video_buffer(struct mixer_context *ctx, chroma_addr[1] = chroma_addr[0] + fb->pitches[0]; } } else { - __clear_bit(MXR_BIT_INTERLACE, &ctx->flags); luma_addr[1] = 0; chroma_addr[1] = 0; } - spin_lock_irqsave(&res->reg_slock, flags); + spin_lock_irqsave(&ctx->reg_slock, flags); /* interlace or progressive scan mode */ val = (test_bit(MXR_BIT_INTERLACE, &ctx->flags) ? ~0 : 0); - vp_reg_writemask(res, VP_MODE, val, VP_MODE_LINE_SKIP); + vp_reg_writemask(ctx, VP_MODE, val, VP_MODE_LINE_SKIP); /* setup format */ val = (is_nv21 ? VP_MODE_NV21 : VP_MODE_NV12); val |= (is_tiled ? VP_MODE_MEM_TILED : VP_MODE_MEM_LINEAR); - vp_reg_writemask(res, VP_MODE, val, VP_MODE_FMT_MASK); + vp_reg_writemask(ctx, VP_MODE, val, VP_MODE_FMT_MASK); /* setting size of input image */ - vp_reg_write(res, VP_IMG_SIZE_Y, VP_IMG_HSIZE(fb->pitches[0]) | + vp_reg_write(ctx, VP_IMG_SIZE_Y, VP_IMG_HSIZE(fb->pitches[0]) | VP_IMG_VSIZE(fb->height)); /* chroma plane for NV12/NV21 is half the height of the luma plane */ - vp_reg_write(res, VP_IMG_SIZE_C, VP_IMG_HSIZE(fb->pitches[0]) | + vp_reg_write(ctx, VP_IMG_SIZE_C, VP_IMG_HSIZE(fb->pitches[0]) | VP_IMG_VSIZE(fb->height / 2)); - vp_reg_write(res, VP_SRC_WIDTH, state->src.w); - vp_reg_write(res, VP_SRC_HEIGHT, state->src.h); - vp_reg_write(res, VP_SRC_H_POSITION, + vp_reg_write(ctx, VP_SRC_WIDTH, state->src.w); + vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h); + vp_reg_write(ctx, VP_SRC_H_POSITION, VP_SRC_H_POSITION_VAL(state->src.x)); - vp_reg_write(res, VP_SRC_V_POSITION, state->src.y); + vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y); - vp_reg_write(res, VP_DST_WIDTH, state->crtc.w); - vp_reg_write(res, VP_DST_H_POSITION, state->crtc.x); + vp_reg_write(ctx, VP_DST_WIDTH, state->crtc.w); + vp_reg_write(ctx, VP_DST_H_POSITION, state->crtc.x); if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) { - vp_reg_write(res, VP_DST_HEIGHT, state->crtc.h / 2); - vp_reg_write(res, VP_DST_V_POSITION, state->crtc.y / 2); + vp_reg_write(ctx, VP_DST_HEIGHT, state->crtc.h / 2); + vp_reg_write(ctx, VP_DST_V_POSITION, state->crtc.y / 2); } else { - vp_reg_write(res, VP_DST_HEIGHT, state->crtc.h); - vp_reg_write(res, VP_DST_V_POSITION, state->crtc.y); + vp_reg_write(ctx, VP_DST_HEIGHT, state->crtc.h); + vp_reg_write(ctx, VP_DST_V_POSITION, state->crtc.y); } - vp_reg_write(res, VP_H_RATIO, state->h_ratio); - vp_reg_write(res, VP_V_RATIO, state->v_ratio); + vp_reg_write(ctx, VP_H_RATIO, state->h_ratio); + vp_reg_write(ctx, VP_V_RATIO, state->v_ratio); - vp_reg_write(res, VP_ENDIAN_MODE, VP_ENDIAN_MODE_LITTLE); + vp_reg_write(ctx, VP_ENDIAN_MODE, VP_ENDIAN_MODE_LITTLE); /* set buffer address to vp */ - vp_reg_write(res, VP_TOP_Y_PTR, luma_addr[0]); - vp_reg_write(res, VP_BOT_Y_PTR, luma_addr[1]); - vp_reg_write(res, VP_TOP_C_PTR, chroma_addr[0]); - vp_reg_write(res, VP_BOT_C_PTR, chroma_addr[1]); + vp_reg_write(ctx, VP_TOP_Y_PTR, luma_addr[0]); + vp_reg_write(ctx, VP_BOT_Y_PTR, luma_addr[1]); + vp_reg_write(ctx, VP_TOP_C_PTR, chroma_addr[0]); + vp_reg_write(ctx, VP_BOT_C_PTR, chroma_addr[1]); - mixer_cfg_scan(ctx, mode->vdisplay); - mixer_cfg_rgb_fmt(ctx, mode->vdisplay); mixer_cfg_layer(ctx, plane->index, priority, true); mixer_cfg_vp_blend(ctx); - mixer_run(ctx); - spin_unlock_irqrestore(&res->reg_slock, flags); + spin_unlock_irqrestore(&ctx->reg_slock, flags); mixer_regs_dump(ctx); vp_regs_dump(ctx); @@ -567,9 +548,7 @@ static void vp_video_buffer(struct mixer_context *ctx, static void mixer_layer_update(struct mixer_context *ctx) { - struct mixer_resources *res = &ctx->mixer_res; - - mixer_reg_writemask(res, MXR_CFG, ~0, MXR_CFG_LAYER_UPDATE); + mixer_reg_writemask(ctx, MXR_CFG, ~0, MXR_CFG_LAYER_UPDATE); } static void mixer_graph_buffer(struct mixer_context *ctx, @@ -577,8 +556,6 @@ static void mixer_graph_buffer(struct mixer_context *ctx, { struct exynos_drm_plane_state *state = to_exynos_plane_state(plane->base.state); - struct drm_display_mode *mode = &state->base.crtc->state->adjusted_mode; - struct mixer_resources *res = &ctx->mixer_res; struct drm_framebuffer *fb = state->base.fb; unsigned int priority = state->base.normalized_zpos + 1; unsigned long flags; @@ -623,45 +600,30 @@ static void mixer_graph_buffer(struct mixer_context *ctx, + (state->src.x * fb->format->cpp[0]) + (state->src.y * fb->pitches[0]); - if (mode->flags & DRM_MODE_FLAG_INTERLACE) - __set_bit(MXR_BIT_INTERLACE, &ctx->flags); - else - __clear_bit(MXR_BIT_INTERLACE, &ctx->flags); - - spin_lock_irqsave(&res->reg_slock, flags); + spin_lock_irqsave(&ctx->reg_slock, flags); /* setup format */ - mixer_reg_writemask(res, MXR_GRAPHIC_CFG(win), + mixer_reg_writemask(ctx, MXR_GRAPHIC_CFG(win), MXR_GRP_CFG_FORMAT_VAL(fmt), MXR_GRP_CFG_FORMAT_MASK); /* setup geometry */ - mixer_reg_write(res, MXR_GRAPHIC_SPAN(win), + mixer_reg_write(ctx, MXR_GRAPHIC_SPAN(win), fb->pitches[0] / fb->format->cpp[0]); - /* setup display size */ - if (ctx->mxr_ver == MXR_VER_128_0_0_184 && - win == DEFAULT_WIN) { - val = MXR_MXR_RES_HEIGHT(mode->vdisplay); - val |= MXR_MXR_RES_WIDTH(mode->hdisplay); - mixer_reg_write(res, MXR_RESOLUTION, val); - } - val = MXR_GRP_WH_WIDTH(state->src.w); val |= MXR_GRP_WH_HEIGHT(state->src.h); val |= MXR_GRP_WH_H_SCALE(x_ratio); val |= MXR_GRP_WH_V_SCALE(y_ratio); - mixer_reg_write(res, MXR_GRAPHIC_WH(win), val); + mixer_reg_write(ctx, MXR_GRAPHIC_WH(win), val); /* setup offsets in display image */ val = MXR_GRP_DXY_DX(dst_x_offset); val |= MXR_GRP_DXY_DY(dst_y_offset); - mixer_reg_write(res, MXR_GRAPHIC_DXY(win), val); + mixer_reg_write(ctx, MXR_GRAPHIC_DXY(win), val); /* set buffer address to mixer */ - mixer_reg_write(res, MXR_GRAPHIC_BASE(win), dma_addr); + mixer_reg_write(ctx, MXR_GRAPHIC_BASE(win), dma_addr); - mixer_cfg_scan(ctx, mode->vdisplay); - mixer_cfg_rgb_fmt(ctx, mode->vdisplay); mixer_cfg_layer(ctx, win, priority, true); mixer_cfg_gfx_blend(ctx, win, is_alpha_format(fb->format->format)); @@ -670,22 +632,19 @@ static void mixer_graph_buffer(struct mixer_context *ctx, ctx->mxr_ver == MXR_VER_128_0_0_184) mixer_layer_update(ctx); - mixer_run(ctx); - - spin_unlock_irqrestore(&res->reg_slock, flags); + spin_unlock_irqrestore(&ctx->reg_slock, flags); mixer_regs_dump(ctx); } static void vp_win_reset(struct mixer_context *ctx) { - struct mixer_resources *res = &ctx->mixer_res; unsigned int tries = 100; - vp_reg_write(res, VP_SRESET, VP_SRESET_PROCESSING); + vp_reg_write(ctx, VP_SRESET, VP_SRESET_PROCESSING); while (--tries) { /* waiting until VP_SRESET_PROCESSING is 0 */ - if (~vp_reg_read(res, VP_SRESET) & VP_SRESET_PROCESSING) + if (~vp_reg_read(ctx, VP_SRESET) & VP_SRESET_PROCESSING) break; mdelay(10); } @@ -694,57 +653,55 @@ static void vp_win_reset(struct mixer_context *ctx) static void mixer_win_reset(struct mixer_context *ctx) { - struct mixer_resources *res = &ctx->mixer_res; unsigned long flags; - spin_lock_irqsave(&res->reg_slock, flags); + spin_lock_irqsave(&ctx->reg_slock, flags); - mixer_reg_writemask(res, MXR_CFG, MXR_CFG_DST_HDMI, MXR_CFG_DST_MASK); + mixer_reg_writemask(ctx, MXR_CFG, MXR_CFG_DST_HDMI, MXR_CFG_DST_MASK); /* set output in RGB888 mode */ - mixer_reg_writemask(res, MXR_CFG, MXR_CFG_OUT_RGB888, MXR_CFG_OUT_MASK); + mixer_reg_writemask(ctx, MXR_CFG, MXR_CFG_OUT_RGB888, MXR_CFG_OUT_MASK); /* 16 beat burst in DMA */ - mixer_reg_writemask(res, MXR_STATUS, MXR_STATUS_16_BURST, + mixer_reg_writemask(ctx, MXR_STATUS, MXR_STATUS_16_BURST, MXR_STATUS_BURST_MASK); /* reset default layer priority */ - mixer_reg_write(res, MXR_LAYER_CFG, 0); + mixer_reg_write(ctx, MXR_LAYER_CFG, 0); /* set all background colors to RGB (0,0,0) */ - mixer_reg_write(res, MXR_BG_COLOR0, MXR_YCBCR_VAL(0, 128, 128)); - mixer_reg_write(res, MXR_BG_COLOR1, MXR_YCBCR_VAL(0, 128, 128)); - mixer_reg_write(res, MXR_BG_COLOR2, MXR_YCBCR_VAL(0, 128, 128)); + mixer_reg_write(ctx, MXR_BG_COLOR0, MXR_YCBCR_VAL(0, 128, 128)); + mixer_reg_write(ctx, MXR_BG_COLOR1, MXR_YCBCR_VAL(0, 128, 128)); + mixer_reg_write(ctx, MXR_BG_COLOR2, MXR_YCBCR_VAL(0, 128, 128)); if (test_bit(MXR_BIT_VP_ENABLED, &ctx->flags)) { /* configuration of Video Processor Registers */ vp_win_reset(ctx); - vp_default_filter(res); + vp_default_filter(ctx); } /* disable all layers */ - mixer_reg_writemask(res, MXR_CFG, 0, MXR_CFG_GRP0_ENABLE); - mixer_reg_writemask(res, MXR_CFG, 0, MXR_CFG_GRP1_ENABLE); + mixer_reg_writemask(ctx, MXR_CFG, 0, MXR_CFG_GRP0_ENABLE); + mixer_reg_writemask(ctx, MXR_CFG, 0, MXR_CFG_GRP1_ENABLE); if (test_bit(MXR_BIT_VP_ENABLED, &ctx->flags)) - mixer_reg_writemask(res, MXR_CFG, 0, MXR_CFG_VP_ENABLE); + mixer_reg_writemask(ctx, MXR_CFG, 0, MXR_CFG_VP_ENABLE); /* set all source image offsets to zero */ - mixer_reg_write(res, MXR_GRAPHIC_SXY(0), 0); - mixer_reg_write(res, MXR_GRAPHIC_SXY(1), 0); + mixer_reg_write(ctx, MXR_GRAPHIC_SXY(0), 0); + mixer_reg_write(ctx, MXR_GRAPHIC_SXY(1), 0); - spin_unlock_irqrestore(&res->reg_slock, flags); + spin_unlock_irqrestore(&ctx->reg_slock, flags); } static irqreturn_t mixer_irq_handler(int irq, void *arg) { struct mixer_context *ctx = arg; - struct mixer_resources *res = &ctx->mixer_res; u32 val, base, shadow; - spin_lock(&res->reg_slock); + spin_lock(&ctx->reg_slock); /* read interrupt status for handling and clearing flags for VSYNC */ - val = mixer_reg_read(res, MXR_INT_STATUS); + val = mixer_reg_read(ctx, MXR_INT_STATUS); /* handling VSYNC */ if (val & MXR_INT_STATUS_VSYNC) { @@ -754,13 +711,13 @@ static irqreturn_t mixer_irq_handler(int irq, void *arg) /* interlace scan need to check shadow register */ if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) { - base = mixer_reg_read(res, MXR_GRAPHIC_BASE(0)); - shadow = mixer_reg_read(res, MXR_GRAPHIC_BASE_S(0)); + base = mixer_reg_read(ctx, MXR_GRAPHIC_BASE(0)); + shadow = mixer_reg_read(ctx, MXR_GRAPHIC_BASE_S(0)); if (base != shadow) goto out; - base = mixer_reg_read(res, MXR_GRAPHIC_BASE(1)); - shadow = mixer_reg_read(res, MXR_GRAPHIC_BASE_S(1)); + base = mixer_reg_read(ctx, MXR_GRAPHIC_BASE(1)); + shadow = mixer_reg_read(ctx, MXR_GRAPHIC_BASE_S(1)); if (base != shadow) goto out; } @@ -770,9 +727,9 @@ static irqreturn_t mixer_irq_handler(int irq, void *arg) out: /* clear interrupts */ - mixer_reg_write(res, MXR_INT_STATUS, val); + mixer_reg_write(ctx, MXR_INT_STATUS, val); - spin_unlock(&res->reg_slock); + spin_unlock(&ctx->reg_slock); return IRQ_HANDLED; } @@ -780,26 +737,25 @@ out: static int mixer_resources_init(struct mixer_context *mixer_ctx) { struct device *dev = &mixer_ctx->pdev->dev; - struct mixer_resources *mixer_res = &mixer_ctx->mixer_res; struct resource *res; int ret; - spin_lock_init(&mixer_res->reg_slock); + spin_lock_init(&mixer_ctx->reg_slock); - mixer_res->mixer = devm_clk_get(dev, "mixer"); - if (IS_ERR(mixer_res->mixer)) { + mixer_ctx->mixer = devm_clk_get(dev, "mixer"); + if (IS_ERR(mixer_ctx->mixer)) { dev_err(dev, "failed to get clock 'mixer'\n"); return -ENODEV; } - mixer_res->hdmi = devm_clk_get(dev, "hdmi"); - if (IS_ERR(mixer_res->hdmi)) { + mixer_ctx->hdmi = devm_clk_get(dev, "hdmi"); + if (IS_ERR(mixer_ctx->hdmi)) { dev_err(dev, "failed to get clock 'hdmi'\n"); - return PTR_ERR(mixer_res->hdmi); + return PTR_ERR(mixer_ctx->hdmi); } - mixer_res->sclk_hdmi = devm_clk_get(dev, "sclk_hdmi"); - if (IS_ERR(mixer_res->sclk_hdmi)) { + mixer_ctx->sclk_hdmi = devm_clk_get(dev, "sclk_hdmi"); + if (IS_ERR(mixer_ctx->sclk_hdmi)) { dev_err(dev, "failed to get clock 'sclk_hdmi'\n"); return -ENODEV; } @@ -809,9 +765,9 @@ static int mixer_resources_init(struct mixer_context *mixer_ctx) return -ENXIO; } - mixer_res->mixer_regs = devm_ioremap(dev, res->start, + mixer_ctx->mixer_regs = devm_ioremap(dev, res->start, resource_size(res)); - if (mixer_res->mixer_regs == NULL) { + if (mixer_ctx->mixer_regs == NULL) { dev_err(dev, "register mapping failed.\n"); return -ENXIO; } @@ -828,7 +784,7 @@ static int mixer_resources_init(struct mixer_context *mixer_ctx) dev_err(dev, "request interrupt failed.\n"); return ret; } - mixer_res->irq = res->start; + mixer_ctx->irq = res->start; return 0; } @@ -836,30 +792,29 @@ static int mixer_resources_init(struct mixer_context *mixer_ctx) static int vp_resources_init(struct mixer_context *mixer_ctx) { struct device *dev = &mixer_ctx->pdev->dev; - struct mixer_resources *mixer_res = &mixer_ctx->mixer_res; struct resource *res; - mixer_res->vp = devm_clk_get(dev, "vp"); - if (IS_ERR(mixer_res->vp)) { + mixer_ctx->vp = devm_clk_get(dev, "vp"); + if (IS_ERR(mixer_ctx->vp)) { dev_err(dev, "failed to get clock 'vp'\n"); return -ENODEV; } if (test_bit(MXR_BIT_HAS_SCLK, &mixer_ctx->flags)) { - mixer_res->sclk_mixer = devm_clk_get(dev, "sclk_mixer"); - if (IS_ERR(mixer_res->sclk_mixer)) { + mixer_ctx->sclk_mixer = devm_clk_get(dev, "sclk_mixer"); + if (IS_ERR(mixer_ctx->sclk_mixer)) { dev_err(dev, "failed to get clock 'sclk_mixer'\n"); return -ENODEV; } - mixer_res->mout_mixer = devm_clk_get(dev, "mout_mixer"); - if (IS_ERR(mixer_res->mout_mixer)) { + mixer_ctx->mout_mixer = devm_clk_get(dev, "mout_mixer"); + if (IS_ERR(mixer_ctx->mout_mixer)) { dev_err(dev, "failed to get clock 'mout_mixer'\n"); return -ENODEV; } - if (mixer_res->sclk_hdmi && mixer_res->mout_mixer) - clk_set_parent(mixer_res->mout_mixer, - mixer_res->sclk_hdmi); + if (mixer_ctx->sclk_hdmi && mixer_ctx->mout_mixer) + clk_set_parent(mixer_ctx->mout_mixer, + mixer_ctx->sclk_hdmi); } res = platform_get_resource(mixer_ctx->pdev, IORESOURCE_MEM, 1); @@ -868,9 +823,9 @@ static int vp_resources_init(struct mixer_context *mixer_ctx) return -ENXIO; } - mixer_res->vp_regs = devm_ioremap(dev, res->start, + mixer_ctx->vp_regs = devm_ioremap(dev, res->start, resource_size(res)); - if (mixer_res->vp_regs == NULL) { + if (mixer_ctx->vp_regs == NULL) { dev_err(dev, "register mapping failed.\n"); return -ENXIO; } @@ -914,15 +869,14 @@ static void mixer_ctx_remove(struct mixer_context *mixer_ctx) static int mixer_enable_vblank(struct exynos_drm_crtc *crtc) { struct mixer_context *mixer_ctx = crtc->ctx; - struct mixer_resources *res = &mixer_ctx->mixer_res; __set_bit(MXR_BIT_VSYNC, &mixer_ctx->flags); if (!test_bit(MXR_BIT_POWERED, &mixer_ctx->flags)) return 0; /* enable vsync interrupt */ - mixer_reg_writemask(res, MXR_INT_STATUS, ~0, MXR_INT_CLEAR_VSYNC); - mixer_reg_writemask(res, MXR_INT_EN, ~0, MXR_INT_EN_VSYNC); + mixer_reg_writemask(mixer_ctx, MXR_INT_STATUS, ~0, MXR_INT_CLEAR_VSYNC); + mixer_reg_writemask(mixer_ctx, MXR_INT_EN, ~0, MXR_INT_EN_VSYNC); return 0; } @@ -930,7 +884,6 @@ static int mixer_enable_vblank(struct exynos_drm_crtc *crtc) static void mixer_disable_vblank(struct exynos_drm_crtc *crtc) { struct mixer_context *mixer_ctx = crtc->ctx; - struct mixer_resources *res = &mixer_ctx->mixer_res; __clear_bit(MXR_BIT_VSYNC, &mixer_ctx->flags); @@ -938,8 +891,8 @@ static void mixer_disable_vblank(struct exynos_drm_crtc *crtc) return; /* disable vsync interrupt */ - mixer_reg_writemask(res, MXR_INT_STATUS, ~0, MXR_INT_CLEAR_VSYNC); - mixer_reg_writemask(res, MXR_INT_EN, 0, MXR_INT_EN_VSYNC); + mixer_reg_writemask(mixer_ctx, MXR_INT_STATUS, ~0, MXR_INT_CLEAR_VSYNC); + mixer_reg_writemask(mixer_ctx, MXR_INT_EN, 0, MXR_INT_EN_VSYNC); } static void mixer_atomic_begin(struct exynos_drm_crtc *crtc) @@ -972,7 +925,6 @@ static void mixer_disable_plane(struct exynos_drm_crtc *crtc, struct exynos_drm_plane *plane) { struct mixer_context *mixer_ctx = crtc->ctx; - struct mixer_resources *res = &mixer_ctx->mixer_res; unsigned long flags; DRM_DEBUG_KMS("win: %d\n", plane->index); @@ -980,9 +932,9 @@ static void mixer_disable_plane(struct exynos_drm_crtc *crtc, if (!test_bit(MXR_BIT_POWERED, &mixer_ctx->flags)) return; - spin_lock_irqsave(&res->reg_slock, flags); + spin_lock_irqsave(&mixer_ctx->reg_slock, flags); mixer_cfg_layer(mixer_ctx, plane->index, 0, false); - spin_unlock_irqrestore(&res->reg_slock, flags); + spin_unlock_irqrestore(&mixer_ctx->reg_slock, flags); } static void mixer_atomic_flush(struct exynos_drm_crtc *crtc) @@ -999,7 +951,6 @@ static void mixer_atomic_flush(struct exynos_drm_crtc *crtc) static void mixer_enable(struct exynos_drm_crtc *crtc) { struct mixer_context *ctx = crtc->ctx; - struct mixer_resources *res = &ctx->mixer_res; if (test_bit(MXR_BIT_POWERED, &ctx->flags)) return; @@ -1010,14 +961,17 @@ static void mixer_enable(struct exynos_drm_crtc *crtc) mixer_vsync_set_update(ctx, false); - mixer_reg_writemask(res, MXR_STATUS, ~0, MXR_STATUS_SOFT_RESET); + mixer_reg_writemask(ctx, MXR_STATUS, ~0, MXR_STATUS_SOFT_RESET); if (test_bit(MXR_BIT_VSYNC, &ctx->flags)) { - mixer_reg_writemask(res, MXR_INT_STATUS, ~0, MXR_INT_CLEAR_VSYNC); - mixer_reg_writemask(res, MXR_INT_EN, ~0, MXR_INT_EN_VSYNC); + mixer_reg_writemask(ctx, MXR_INT_STATUS, ~0, + MXR_INT_CLEAR_VSYNC); + mixer_reg_writemask(ctx, MXR_INT_EN, ~0, MXR_INT_EN_VSYNC); } mixer_win_reset(ctx); + mixer_commit(ctx); + mixer_vsync_set_update(ctx, true); set_bit(MXR_BIT_POWERED, &ctx->flags); @@ -1044,26 +998,75 @@ static void mixer_disable(struct exynos_drm_crtc *crtc) clear_bit(MXR_BIT_POWERED, &ctx->flags); } -/* Only valid for Mixer version 16.0.33.0 */ -static int mixer_atomic_check(struct exynos_drm_crtc *crtc, - struct drm_crtc_state *state) +static int mixer_mode_valid(struct exynos_drm_crtc *crtc, + const struct drm_display_mode *mode) { - struct drm_display_mode *mode = &state->adjusted_mode; - u32 w, h; + struct mixer_context *ctx = crtc->ctx; + u32 w = mode->hdisplay, h = mode->vdisplay; - w = mode->hdisplay; - h = mode->vdisplay; + DRM_DEBUG_KMS("xres=%d, yres=%d, refresh=%d, intl=%d\n", w, h, + mode->vrefresh, !!(mode->flags & DRM_MODE_FLAG_INTERLACE)); - DRM_DEBUG_KMS("xres=%d, yres=%d, refresh=%d, intl=%d\n", - mode->hdisplay, mode->vdisplay, mode->vrefresh, - (mode->flags & DRM_MODE_FLAG_INTERLACE) ? 1 : 0); + if (ctx->mxr_ver == MXR_VER_128_0_0_184) + return MODE_OK; if ((w >= 464 && w <= 720 && h >= 261 && h <= 576) || - (w >= 1024 && w <= 1280 && h >= 576 && h <= 720) || - (w >= 1664 && w <= 1920 && h >= 936 && h <= 1080)) - return 0; + (w >= 1024 && w <= 1280 && h >= 576 && h <= 720) || + (w >= 1664 && w <= 1920 && h >= 936 && h <= 1080)) + return MODE_OK; + + if ((w == 1024 && h == 768) || + (w == 1366 && h == 768) || + (w == 1280 && h == 1024)) + return MODE_OK; + + return MODE_BAD; +} + +static bool mixer_mode_fixup(struct exynos_drm_crtc *crtc, + const struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode) +{ + struct mixer_context *ctx = crtc->ctx; + int width = mode->hdisplay, height = mode->vdisplay, i; + + struct { + int hdisplay, vdisplay, htotal, vtotal, scan_val; + } static const modes[] = { + { 720, 480, 858, 525, MXR_CFG_SCAN_NTSC | MXR_CFG_SCAN_SD }, + { 720, 576, 864, 625, MXR_CFG_SCAN_PAL | MXR_CFG_SCAN_SD }, + { 1280, 720, 1650, 750, MXR_CFG_SCAN_HD_720 | MXR_CFG_SCAN_HD }, + { 1920, 1080, 2200, 1125, MXR_CFG_SCAN_HD_1080 | + MXR_CFG_SCAN_HD } + }; + + if (mode->flags & DRM_MODE_FLAG_INTERLACE) + __set_bit(MXR_BIT_INTERLACE, &ctx->flags); + else + __clear_bit(MXR_BIT_INTERLACE, &ctx->flags); + + if (ctx->mxr_ver == MXR_VER_128_0_0_184) + return true; + + for (i = 0; i < ARRAY_SIZE(modes); ++i) + if (width <= modes[i].hdisplay && height <= modes[i].vdisplay) { + ctx->scan_value = modes[i].scan_val; + if (width < modes[i].hdisplay || + height < modes[i].vdisplay) { + adjusted_mode->hdisplay = modes[i].hdisplay; + adjusted_mode->hsync_start = modes[i].hdisplay; + adjusted_mode->hsync_end = modes[i].htotal; + adjusted_mode->htotal = modes[i].htotal; + adjusted_mode->vdisplay = modes[i].vdisplay; + adjusted_mode->vsync_start = modes[i].vdisplay; + adjusted_mode->vsync_end = modes[i].vtotal; + adjusted_mode->vtotal = modes[i].vtotal; + } + + return true; + } - return -EINVAL; + return false; } static const struct exynos_drm_crtc_ops mixer_crtc_ops = { @@ -1075,7 +1078,8 @@ static const struct exynos_drm_crtc_ops mixer_crtc_ops = { .update_plane = mixer_update_plane, .disable_plane = mixer_disable_plane, .atomic_flush = mixer_atomic_flush, - .atomic_check = mixer_atomic_check, + .mode_valid = mixer_mode_valid, + .mode_fixup = mixer_mode_fixup, }; static const struct mixer_drv_data exynos5420_mxr_drv_data = { @@ -1217,14 +1221,13 @@ static int mixer_remove(struct platform_device *pdev) static int __maybe_unused exynos_mixer_suspend(struct device *dev) { struct mixer_context *ctx = dev_get_drvdata(dev); - struct mixer_resources *res = &ctx->mixer_res; - clk_disable_unprepare(res->hdmi); - clk_disable_unprepare(res->mixer); + clk_disable_unprepare(ctx->hdmi); + clk_disable_unprepare(ctx->mixer); if (test_bit(MXR_BIT_VP_ENABLED, &ctx->flags)) { - clk_disable_unprepare(res->vp); + clk_disable_unprepare(ctx->vp); if (test_bit(MXR_BIT_HAS_SCLK, &ctx->flags)) - clk_disable_unprepare(res->sclk_mixer); + clk_disable_unprepare(ctx->sclk_mixer); } return 0; @@ -1233,28 +1236,27 @@ static int __maybe_unused exynos_mixer_suspend(struct device *dev) static int __maybe_unused exynos_mixer_resume(struct device *dev) { struct mixer_context *ctx = dev_get_drvdata(dev); - struct mixer_resources *res = &ctx->mixer_res; int ret; - ret = clk_prepare_enable(res->mixer); + ret = clk_prepare_enable(ctx->mixer); if (ret < 0) { DRM_ERROR("Failed to prepare_enable the mixer clk [%d]\n", ret); return ret; } - ret = clk_prepare_enable(res->hdmi); + ret = clk_prepare_enable(ctx->hdmi); if (ret < 0) { DRM_ERROR("Failed to prepare_enable the hdmi clk [%d]\n", ret); return ret; } if (test_bit(MXR_BIT_VP_ENABLED, &ctx->flags)) { - ret = clk_prepare_enable(res->vp); + ret = clk_prepare_enable(ctx->vp); if (ret < 0) { DRM_ERROR("Failed to prepare_enable the vp clk [%d]\n", ret); return ret; } if (test_bit(MXR_BIT_HAS_SCLK, &ctx->flags)) { - ret = clk_prepare_enable(res->sclk_mixer); + ret = clk_prepare_enable(ctx->sclk_mixer); if (ret < 0) { DRM_ERROR("Failed to prepare_enable the " \ "sclk_mixer clk [%d]\n", diff --git a/drivers/gpu/drm/exynos/regs-hdmi.h b/drivers/gpu/drm/exynos/regs-hdmi.h index a0507dc18d9e..04be0f7e8193 100644 --- a/drivers/gpu/drm/exynos/regs-hdmi.h +++ b/drivers/gpu/drm/exynos/regs-hdmi.h @@ -419,11 +419,9 @@ #define HDMI_I2S_DSD_CON HDMI_I2S_BASE(0x01c) #define HDMI_I2S_MUX_CON HDMI_I2S_BASE(0x020) #define HDMI_I2S_CH_ST_CON HDMI_I2S_BASE(0x024) -#define HDMI_I2S_CH_ST_0 HDMI_I2S_BASE(0x028) -#define HDMI_I2S_CH_ST_1 HDMI_I2S_BASE(0x02c) -#define HDMI_I2S_CH_ST_2 HDMI_I2S_BASE(0x030) -#define HDMI_I2S_CH_ST_3 HDMI_I2S_BASE(0x034) -#define HDMI_I2S_CH_ST_4 HDMI_I2S_BASE(0x038) +/* n must be within range 0...(HDMI_I2S_CH_ST_MAXNUM - 1) */ +#define HDMI_I2S_CH_ST_MAXNUM 5 +#define HDMI_I2S_CH_ST(n) HDMI_I2S_BASE(0x028 + 4 * (n)) #define HDMI_I2S_CH_ST_SH_0 HDMI_I2S_BASE(0x03c) #define HDMI_I2S_CH_ST_SH_1 HDMI_I2S_BASE(0x040) #define HDMI_I2S_CH_ST_SH_2 HDMI_I2S_BASE(0x044) diff --git a/drivers/gpu/drm/fsl-dcu/Makefile b/drivers/gpu/drm/fsl-dcu/Makefile index aca34f656bea..b55c4482d0f9 100644 --- a/drivers/gpu/drm/fsl-dcu/Makefile +++ b/drivers/gpu/drm/fsl-dcu/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 fsl-dcu-drm-y := fsl_dcu_drm_drv.o \ fsl_dcu_drm_kms.o \ fsl_dcu_drm_rgb.o \ diff --git a/drivers/gpu/drm/gma500/Makefile b/drivers/gpu/drm/gma500/Makefile index c1c8dc18aa53..c8f2c89be99d 100644 --- a/drivers/gpu/drm/gma500/Makefile +++ b/drivers/gpu/drm/gma500/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # KMS driver for the GMA500 # diff --git a/drivers/gpu/drm/gma500/psb_lid.c b/drivers/gpu/drm/gma500/psb_lid.c index 1d2ebb5e530f..be6dda58fcae 100644 --- a/drivers/gpu/drm/gma500/psb_lid.c +++ b/drivers/gpu/drm/gma500/psb_lid.c @@ -23,9 +23,9 @@ #include "psb_intel_reg.h" #include <linux/spinlock.h> -static void psb_lid_timer_func(unsigned long data) +static void psb_lid_timer_func(struct timer_list *t) { - struct drm_psb_private * dev_priv = (struct drm_psb_private *)data; + struct drm_psb_private *dev_priv = from_timer(dev_priv, t, lid_timer); struct drm_device *dev = (struct drm_device *)dev_priv->dev; struct timer_list *lid_timer = &dev_priv->lid_timer; unsigned long irq_flags; @@ -77,10 +77,8 @@ void psb_lid_timer_init(struct drm_psb_private *dev_priv) spin_lock_init(&dev_priv->lid_lock); spin_lock_irqsave(&dev_priv->lid_lock, irq_flags); - init_timer(lid_timer); + timer_setup(lid_timer, psb_lid_timer_func, 0); - lid_timer->data = (unsigned long)dev_priv; - lid_timer->function = psb_lid_timer_func; lid_timer->expires = jiffies + PSB_LID_DELAY; add_timer(lid_timer); diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c index 9823477b1855..2269be91f3e1 100644 --- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c +++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c @@ -534,9 +534,12 @@ static void ade_crtc_atomic_begin(struct drm_crtc *crtc, { struct ade_crtc *acrtc = to_ade_crtc(crtc); struct ade_hw_ctx *ctx = acrtc->ctx; + struct drm_display_mode *mode = &crtc->state->mode; + struct drm_display_mode *adj_mode = &crtc->state->adjusted_mode; if (!ctx->power_on) (void)ade_power_up(ctx); + ade_ldi_set_mode(acrtc, mode, adj_mode); } static void ade_crtc_atomic_flush(struct drm_crtc *crtc, diff --git a/drivers/gpu/drm/i2c/Makefile b/drivers/gpu/drm/i2c/Makefile index a77acfc1852e..b20100c18ffb 100644 --- a/drivers/gpu/drm/i2c/Makefile +++ b/drivers/gpu/drm/i2c/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 ch7006-y := ch7006_drv.o ch7006_mode.o obj-$(CONFIG_DRM_I2C_CH7006) += ch7006.o diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 5182e3d5557d..2acf3b3c5f9d 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. @@ -47,6 +48,7 @@ i915-y += i915_cmd_parser.o \ i915_gem_tiling.o \ i915_gem_timeline.o \ i915_gem_userptr.o \ + i915_gemfs.o \ i915_trace_points.o \ i915_vma.o \ intel_breadcrumbs.o \ @@ -59,9 +61,11 @@ i915-y += i915_cmd_parser.o \ # general-purpose microcontroller (GuC) support i915-y += intel_uc.o \ + intel_uc_fw.o \ + intel_guc.o \ intel_guc_ct.o \ intel_guc_log.o \ - intel_guc_loader.o \ + intel_guc_fw.o \ intel_huc.o \ i915_guc_submission.o diff --git a/drivers/gpu/drm/i915/gvt/Makefile b/drivers/gpu/drm/i915/gvt/Makefile index f5486cb94818..2641ba510a61 100644 --- a/drivers/gpu/drm/i915/gvt/Makefile +++ b/drivers/gpu/drm/i915/gvt/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 GVT_DIR := gvt GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \ interrupt.o gtt.o cfg_space.o opregion.o mmio.o display.o edid.o \ diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 2c0ccbb817dc..701a3c6f1669 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -2734,6 +2734,9 @@ static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) uint32_t per_ctx_start[CACHELINE_DWORDS] = {0}; unsigned char *bb_start_sva; + if (!wa_ctx->per_ctx.valid) + return 0; + per_ctx_start[0] = 0x18800001; per_ctx_start[1] = wa_ctx->per_ctx.guest_gma; diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 5ec07ecf33ad..4427be18e4a9 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -734,8 +734,7 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, CACHELINE_BYTES; workload->wa_ctx.per_ctx.guest_gma = per_ctx & PER_CTX_ADDR_MASK; - - WARN_ON(workload->wa_ctx.indirect_ctx.size && !(per_ctx & 0x1)); + workload->wa_ctx.per_ctx.valid = per_ctx & 1; } if (emulate_schedule_in) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 2294466dd415..a5bed2e71b92 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1429,18 +1429,7 @@ static int skl_lcpll_write(struct intel_vgpu *vgpu, unsigned int offset, return 0; } -static int ring_timestamp_mmio_read(struct intel_vgpu *vgpu, - unsigned int offset, void *p_data, unsigned int bytes) -{ - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - - mmio_hw_access_pre(dev_priv); - vgpu_vreg(vgpu, offset) = I915_READ(_MMIO(offset)); - mmio_hw_access_post(dev_priv); - return intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes); -} - -static int instdone_mmio_read(struct intel_vgpu *vgpu, +static int mmio_read_from_hw(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; @@ -1589,6 +1578,8 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu, MMIO_F(prefix(BLT_RING_BASE), s, f, am, rm, d, r, w); \ MMIO_F(prefix(GEN6_BSD_RING_BASE), s, f, am, rm, d, r, w); \ MMIO_F(prefix(VEBOX_RING_BASE), s, f, am, rm, d, r, w); \ + if (HAS_BSD2(dev_priv)) \ + MMIO_F(prefix(GEN8_BSD2_RING_BASE), s, f, am, rm, d, r, w); \ } while (0) #define MMIO_RING_D(prefix, d) \ @@ -1635,10 +1626,9 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) #undef RING_REG #define RING_REG(base) (base + 0x6c) - MMIO_RING_DFH(RING_REG, D_ALL, 0, instdone_mmio_read, NULL); - MMIO_DH(RING_REG(GEN8_BSD2_RING_BASE), D_ALL, instdone_mmio_read, NULL); + MMIO_RING_DFH(RING_REG, D_ALL, 0, mmio_read_from_hw, NULL); #undef RING_REG - MMIO_DH(GEN7_SC_INSTDONE, D_BDW_PLUS, instdone_mmio_read, NULL); + MMIO_DH(GEN7_SC_INSTDONE, D_BDW_PLUS, mmio_read_from_hw, NULL); MMIO_GM_RDR(0x2148, D_ALL, NULL, NULL); MMIO_GM_RDR(CCID, D_ALL, NULL, NULL); @@ -1648,7 +1638,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_RING_DFH(RING_TAIL, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_RING_DFH(RING_HEAD, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_RING_DFH(RING_CTL, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_RING_DFH(RING_ACTHD, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_ACTHD, D_ALL, F_CMD_ACCESS, mmio_read_from_hw, NULL); MMIO_RING_GM_RDR(RING_START, D_ALL, NULL, NULL); /* RING MODE */ @@ -1662,9 +1652,9 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_RING_DFH(RING_TIMESTAMP, D_ALL, F_CMD_ACCESS, - ring_timestamp_mmio_read, NULL); + mmio_read_from_hw, NULL); MMIO_RING_DFH(RING_TIMESTAMP_UDW, D_ALL, F_CMD_ACCESS, - ring_timestamp_mmio_read, NULL); + mmio_read_from_hw, NULL); MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK | F_CMD_ACCESS, @@ -2411,9 +2401,6 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) struct drm_i915_private *dev_priv = gvt->dev_priv; int ret; - MMIO_DFH(RING_IMR(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, NULL, - intel_vgpu_reg_imr_handler); - MMIO_DH(GEN8_GT_IMR(0), D_BDW_PLUS, NULL, intel_vgpu_reg_imr_handler); MMIO_DH(GEN8_GT_IER(0), D_BDW_PLUS, NULL, intel_vgpu_reg_ier_handler); MMIO_DH(GEN8_GT_IIR(0), D_BDW_PLUS, NULL, intel_vgpu_reg_iir_handler); @@ -2476,68 +2463,34 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_DH(GEN8_MASTER_IRQ, D_BDW_PLUS, NULL, intel_vgpu_reg_master_irq_handler); - MMIO_DFH(RING_HWSTAM(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x1c134, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - - MMIO_DFH(RING_TAIL(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, - NULL, NULL); - MMIO_DFH(RING_HEAD(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_CMD_ACCESS, NULL, NULL); - MMIO_GM_RDR(RING_START(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL); - MMIO_DFH(RING_CTL(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, - NULL, NULL); - MMIO_DFH(RING_ACTHD(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(RING_ACTHD_UDW(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x1c29c, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, - ring_mode_mmio_write); - MMIO_DFH(RING_MI_MODE(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(RING_INSTPM(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(RING_TIMESTAMP(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, - ring_timestamp_mmio_read, NULL); - - MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS, + mmio_read_from_hw, NULL); #define RING_REG(base) (base + 0xd0) MMIO_RING_F(RING_REG, 4, F_RO, 0, ~_MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET), D_BDW_PLUS, NULL, ring_reset_ctl_write); - MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO, 0, - ~_MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET), D_BDW_PLUS, NULL, - ring_reset_ctl_write); #undef RING_REG #define RING_REG(base) (base + 0x230) MMIO_RING_DFH(RING_REG, D_BDW_PLUS, 0, NULL, elsp_mmio_write); - MMIO_DH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, elsp_mmio_write); #undef RING_REG #define RING_REG(base) (base + 0x234) MMIO_RING_F(RING_REG, 8, F_RO | F_CMD_ACCESS, 0, ~0, D_BDW_PLUS, NULL, NULL); - MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO | F_CMD_ACCESS, 0, - ~0LL, D_BDW_PLUS, NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x244) MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, - NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x370) MMIO_RING_F(RING_REG, 48, F_RO, 0, ~0, D_BDW_PLUS, NULL, NULL); - MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 48, F_RO, 0, ~0, D_BDW_PLUS, - NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x3a0) MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_MODE_MASK, NULL, NULL); - MMIO_DFH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_MODE_MASK, NULL, NULL); #undef RING_REG MMIO_D(PIPEMISC(PIPE_A), D_BDW_PLUS); @@ -2557,11 +2510,9 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) #define RING_REG(base) (base + 0x270) MMIO_RING_F(RING_REG, 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL); - MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL); #undef RING_REG MMIO_RING_GM_RDR(RING_HWS_PGA, D_BDW_PLUS, NULL, NULL); - MMIO_GM_RDR(RING_HWS_PGA(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL); MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); @@ -2849,7 +2800,6 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(0x65f08, D_SKL | D_KBL); MMIO_D(0x320f0, D_SKL | D_KBL); - MMIO_DFH(_REG_VCS2_EXCC, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_D(0x70034, D_SKL_PLUS); MMIO_D(0x71034, D_SKL_PLUS); MMIO_D(0x72034, D_SKL_PLUS); diff --git a/drivers/gpu/drm/i915/gvt/reg.h b/drivers/gpu/drm/i915/gvt/reg.h index fbd023a16f18..7d01c77a0f7a 100644 --- a/drivers/gpu/drm/i915/gvt/reg.h +++ b/drivers/gpu/drm/i915/gvt/reg.h @@ -54,9 +54,6 @@ #define VGT_SPRSTRIDE(pipe) _PIPE(pipe, _SPRA_STRIDE, _PLANE_STRIDE_2_B) -#define _REG_VECS_EXCC 0x1A028 -#define _REG_VCS2_EXCC 0x1c028 - #define _REG_701C0(pipe, plane) (0x701c0 + pipe * 0x1000 + (plane - 1) * 0x100) #define _REG_701C4(pipe, plane) (0x701c4 + pipe * 0x1000 + (plane - 1) * 0x100) diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index 436377da41ba..03532dfc0cd5 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -308,20 +308,8 @@ static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu) static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu) { - struct intel_gvt_workload_scheduler *scheduler = &vgpu->gvt->scheduler; - int ring_id; - kfree(vgpu->sched_data); vgpu->sched_data = NULL; - - spin_lock_bh(&scheduler->mmio_context_lock); - for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) { - if (scheduler->engine_owner[ring_id] == vgpu) { - intel_gvt_switch_mmio(vgpu, NULL, ring_id); - scheduler->engine_owner[ring_id] = NULL; - } - } - spin_unlock_bh(&scheduler->mmio_context_lock); } static void tbs_sched_start_schedule(struct intel_vgpu *vgpu) @@ -388,6 +376,7 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu) { struct intel_gvt_workload_scheduler *scheduler = &vgpu->gvt->scheduler; + int ring_id; gvt_dbg_core("vgpu%d: stop schedule\n", vgpu->id); @@ -401,4 +390,13 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu) scheduler->need_reschedule = true; scheduler->current_vgpu = NULL; } + + spin_lock_bh(&scheduler->mmio_context_lock); + for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) { + if (scheduler->engine_owner[ring_id] == vgpu) { + intel_gvt_switch_mmio(vgpu, NULL, ring_id); + scheduler->engine_owner[ring_id] = NULL; + } + } + spin_unlock_bh(&scheduler->mmio_context_lock); } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index d5892d24f0b6..f6ded475bb2c 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -174,6 +174,7 @@ static int shadow_context_status_change(struct notifier_block *nb, atomic_set(&workload->shadow_ctx_active, 1); break; case INTEL_CONTEXT_SCHEDULE_OUT: + case INTEL_CONTEXT_SCHEDULE_PREEMPTED: atomic_set(&workload->shadow_ctx_active, 0); break; default: diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index f36b85fd6d01..2d694f6c0907 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -68,6 +68,7 @@ struct shadow_indirect_ctx { struct shadow_per_ctx { unsigned long guest_gma; unsigned long shadow_gma; + unsigned valid; }; struct intel_shadow_wa_ctx { diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b4a6ac60e7c6..c65e381b85f3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -30,6 +30,7 @@ #include <linux/sort.h> #include <linux/sched/mm.h> #include "intel_drv.h" +#include "i915_guc_submission.h" static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) { @@ -82,7 +83,7 @@ static char get_active_flag(struct drm_i915_gem_object *obj) static char get_pin_flag(struct drm_i915_gem_object *obj) { - return obj->pin_display ? 'p' : ' '; + return obj->pin_global ? 'p' : ' '; } static char get_tiling_flag(struct drm_i915_gem_object *obj) @@ -97,7 +98,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj) static char get_global_flag(struct drm_i915_gem_object *obj) { - return !list_empty(&obj->userfault_link) ? 'g' : ' '; + return obj->userfault_count ? 'g' : ' '; } static char get_pin_mapped_flag(struct drm_i915_gem_object *obj) @@ -118,6 +119,36 @@ static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj) return size; } +static const char * +stringify_page_sizes(unsigned int page_sizes, char *buf, size_t len) +{ + size_t x = 0; + + switch (page_sizes) { + case 0: + return ""; + case I915_GTT_PAGE_SIZE_4K: + return "4K"; + case I915_GTT_PAGE_SIZE_64K: + return "64K"; + case I915_GTT_PAGE_SIZE_2M: + return "2M"; + default: + if (!buf) + return "M"; + + if (page_sizes & I915_GTT_PAGE_SIZE_2M) + x += snprintf(buf + x, len - x, "2M, "); + if (page_sizes & I915_GTT_PAGE_SIZE_64K) + x += snprintf(buf + x, len - x, "64K, "); + if (page_sizes & I915_GTT_PAGE_SIZE_4K) + x += snprintf(buf + x, len - x, "4K, "); + buf[x-2] = '\0'; + + return buf; + } +} + static void describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) { @@ -149,15 +180,16 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) pin_count++; } seq_printf(m, " (pinned x %d)", pin_count); - if (obj->pin_display) - seq_printf(m, " (display)"); + if (obj->pin_global) + seq_printf(m, " (global)"); list_for_each_entry(vma, &obj->vma_list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; - seq_printf(m, " (%sgtt offset: %08llx, size: %08llx", + seq_printf(m, " (%sgtt offset: %08llx, size: %08llx, pages: %s", i915_vma_is_ggtt(vma) ? "g" : "pp", - vma->node.start, vma->node.size); + vma->node.start, vma->node.size, + stringify_page_sizes(vma->page_sizes.gtt, NULL, 0)); if (i915_vma_is_ggtt(vma)) { switch (vma->ggtt_view.type) { case I915_GGTT_VIEW_NORMAL: @@ -239,7 +271,9 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) goto out; total_obj_size = total_gtt_size = count = 0; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { + + spin_lock(&dev_priv->mm.obj_lock); + list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) { if (count == total) break; @@ -251,7 +285,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) total_gtt_size += i915_gem_obj_total_ggtt_size(obj); } - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) { + list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) { if (count == total) break; @@ -261,6 +295,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) objects[count++] = obj; total_obj_size += obj->base.size; } + spin_unlock(&dev_priv->mm.obj_lock); sort(objects, count, sizeof(*objects), obj_rank_by_stolen, NULL); @@ -402,10 +437,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data) struct drm_i915_private *dev_priv = node_to_i915(m->private); struct drm_device *dev = &dev_priv->drm; struct i915_ggtt *ggtt = &dev_priv->ggtt; - u32 count, mapped_count, purgeable_count, dpy_count; - u64 size, mapped_size, purgeable_size, dpy_size; + u32 count, mapped_count, purgeable_count, dpy_count, huge_count; + u64 size, mapped_size, purgeable_size, dpy_size, huge_size; struct drm_i915_gem_object *obj; + unsigned int page_sizes = 0; struct drm_file *file; + char buf[80]; int ret; ret = mutex_lock_interruptible(&dev->struct_mutex); @@ -419,7 +456,10 @@ static int i915_gem_object_info(struct seq_file *m, void *data) size = count = 0; mapped_size = mapped_count = 0; purgeable_size = purgeable_count = 0; - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) { + huge_size = huge_count = 0; + + spin_lock(&dev_priv->mm.obj_lock); + list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) { size += obj->base.size; ++count; @@ -432,15 +472,21 @@ static int i915_gem_object_info(struct seq_file *m, void *data) mapped_count++; mapped_size += obj->base.size; } + + if (obj->mm.page_sizes.sg > I915_GTT_PAGE_SIZE) { + huge_count++; + huge_size += obj->base.size; + page_sizes |= obj->mm.page_sizes.sg; + } } seq_printf(m, "%u unbound objects, %llu bytes\n", count, size); size = count = dpy_size = dpy_count = 0; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { + list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) { size += obj->base.size; ++count; - if (obj->pin_display) { + if (obj->pin_global) { dpy_size += obj->base.size; ++dpy_count; } @@ -454,18 +500,33 @@ static int i915_gem_object_info(struct seq_file *m, void *data) mapped_count++; mapped_size += obj->base.size; } + + if (obj->mm.page_sizes.sg > I915_GTT_PAGE_SIZE) { + huge_count++; + huge_size += obj->base.size; + page_sizes |= obj->mm.page_sizes.sg; + } } + spin_unlock(&dev_priv->mm.obj_lock); + seq_printf(m, "%u bound objects, %llu bytes\n", count, size); seq_printf(m, "%u purgeable objects, %llu bytes\n", purgeable_count, purgeable_size); seq_printf(m, "%u mapped objects, %llu bytes\n", mapped_count, mapped_size); - seq_printf(m, "%u display objects (pinned), %llu bytes\n", + seq_printf(m, "%u huge-paged objects (%s) %llu bytes\n", + huge_count, + stringify_page_sizes(page_sizes, buf, sizeof(buf)), + huge_size); + seq_printf(m, "%u display objects (globally pinned), %llu bytes\n", dpy_count, dpy_size); seq_printf(m, "%llu [%llu] gtt total\n", ggtt->base.total, ggtt->mappable_end); + seq_printf(m, "Supported page sizes: %s\n", + stringify_page_sizes(INTEL_INFO(dev_priv)->page_sizes, + buf, sizeof(buf))); seq_putc(m, '\n'); print_batch_pool_stats(m, dev_priv); @@ -514,32 +575,46 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data) struct drm_info_node *node = m->private; struct drm_i915_private *dev_priv = node_to_i915(node); struct drm_device *dev = &dev_priv->drm; - bool show_pin_display_only = !!node->info_ent->data; + struct drm_i915_gem_object **objects; struct drm_i915_gem_object *obj; u64 total_obj_size, total_gtt_size; + unsigned long nobject, n; int count, ret; + nobject = READ_ONCE(dev_priv->mm.object_count); + objects = kvmalloc_array(nobject, sizeof(*objects), GFP_KERNEL); + if (!objects) + return -ENOMEM; + ret = mutex_lock_interruptible(&dev->struct_mutex); if (ret) return ret; - total_obj_size = total_gtt_size = count = 0; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { - if (show_pin_display_only && !obj->pin_display) - continue; + count = 0; + spin_lock(&dev_priv->mm.obj_lock); + list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) { + objects[count++] = obj; + if (count == nobject) + break; + } + spin_unlock(&dev_priv->mm.obj_lock); + + total_obj_size = total_gtt_size = 0; + for (n = 0; n < count; n++) { + obj = objects[n]; seq_puts(m, " "); describe_obj(m, obj); seq_putc(m, '\n'); total_obj_size += obj->base.size; total_gtt_size += i915_gem_obj_total_ggtt_size(obj); - count++; } mutex_unlock(&dev->struct_mutex); seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n", count, total_obj_size, total_gtt_size); + kvfree(objects); return 0; } @@ -589,54 +664,6 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data) return 0; } -static void print_request(struct seq_file *m, - struct drm_i915_gem_request *rq, - const char *prefix) -{ - seq_printf(m, "%s%x [%x:%x] prio=%d @ %dms: %s\n", prefix, - rq->global_seqno, rq->ctx->hw_id, rq->fence.seqno, - rq->priotree.priority, - jiffies_to_msecs(jiffies - rq->emitted_jiffies), - rq->timeline->common->name); -} - -static int i915_gem_request_info(struct seq_file *m, void *data) -{ - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - struct drm_i915_gem_request *req; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int ret, any; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - - any = 0; - for_each_engine(engine, dev_priv, id) { - int count; - - count = 0; - list_for_each_entry(req, &engine->timeline->requests, link) - count++; - if (count == 0) - continue; - - seq_printf(m, "%s requests: %d\n", engine->name, count); - list_for_each_entry(req, &engine->timeline->requests, link) - print_request(m, req, " "); - - any++; - } - mutex_unlock(&dev->struct_mutex); - - if (any == 0) - seq_puts(m, "No requests\n"); - - return 0; -} - static void i915_ring_seqno_info(struct seq_file *m, struct intel_engine_cs *engine) { @@ -1026,6 +1053,7 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops, static int i915_frequency_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct intel_rps *rps = &dev_priv->gt_pm.rps; int ret = 0; intel_runtime_pm_get(dev_priv); @@ -1041,9 +1069,19 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "Current P-state: %d\n", (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT); } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - u32 freq_sts; + u32 rpmodectl, freq_sts; + + mutex_lock(&dev_priv->pcu_lock); + + rpmodectl = I915_READ(GEN6_RP_CONTROL); + seq_printf(m, "Video Turbo Mode: %s\n", + yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); + seq_printf(m, "HW control enabled: %s\n", + yesno(rpmodectl & GEN6_RP_ENABLE)); + seq_printf(m, "SW control enabled: %s\n", + yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == + GEN6_RP_MEDIA_SW_MODE)); - mutex_lock(&dev_priv->rps.hw_lock); freq_sts = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts); seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq); @@ -1052,21 +1090,21 @@ static int i915_frequency_info(struct seq_file *m, void *unused) intel_gpu_freq(dev_priv, (freq_sts >> 8) & 0xff)); seq_printf(m, "current GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq)); + intel_gpu_freq(dev_priv, rps->cur_freq)); seq_printf(m, "max GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); + intel_gpu_freq(dev_priv, rps->max_freq)); seq_printf(m, "min GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq)); + intel_gpu_freq(dev_priv, rps->min_freq)); seq_printf(m, "idle GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq)); + intel_gpu_freq(dev_priv, rps->idle_freq)); seq_printf(m, "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq)); - mutex_unlock(&dev_priv->rps.hw_lock); + intel_gpu_freq(dev_priv, rps->efficient_freq)); + mutex_unlock(&dev_priv->pcu_lock); } else if (INTEL_GEN(dev_priv) >= 6) { u32 rp_state_limits; u32 gt_perf_status; @@ -1136,10 +1174,17 @@ static int i915_frequency_info(struct seq_file *m, void *unused) pm_iir = I915_READ(GEN8_GT_IIR(2)); pm_mask = I915_READ(GEN6_PMINTRMSK); } + seq_printf(m, "Video Turbo Mode: %s\n", + yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); + seq_printf(m, "HW control enabled: %s\n", + yesno(rpmodectl & GEN6_RP_ENABLE)); + seq_printf(m, "SW control enabled: %s\n", + yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == + GEN6_RP_MEDIA_SW_MODE)); seq_printf(m, "PM IER=0x%08x IMR=0x%08x ISR=0x%08x IIR=0x%08x, MASK=0x%08x\n", pm_ier, pm_imr, pm_isr, pm_iir, pm_mask); seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n", - dev_priv->rps.pm_intrmsk_mbz); + rps->pm_intrmsk_mbz); seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); seq_printf(m, "Render p-state ratio: %d\n", (gt_perf_status & (INTEL_GEN(dev_priv) >= 9 ? 0x1ff00 : 0xff00)) >> 8); @@ -1159,8 +1204,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) rpcurup, GT_PM_INTERVAL_TO_US(dev_priv, rpcurup)); seq_printf(m, "RP PREV UP: %d (%dus)\n", rpprevup, GT_PM_INTERVAL_TO_US(dev_priv, rpprevup)); - seq_printf(m, "Up threshold: %d%%\n", - dev_priv->rps.up_threshold); + seq_printf(m, "Up threshold: %d%%\n", rps->up_threshold); seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n", rpdownei, GT_PM_INTERVAL_TO_US(dev_priv, rpdownei)); @@ -1168,8 +1212,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) rpcurdown, GT_PM_INTERVAL_TO_US(dev_priv, rpcurdown)); seq_printf(m, "RP PREV DOWN: %d (%dus)\n", rpprevdown, GT_PM_INTERVAL_TO_US(dev_priv, rpprevdown)); - seq_printf(m, "Down threshold: %d%%\n", - dev_priv->rps.down_threshold); + seq_printf(m, "Down threshold: %d%%\n", rps->down_threshold); max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; @@ -1191,22 +1234,22 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); + intel_gpu_freq(dev_priv, rps->max_freq)); seq_printf(m, "Current freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq)); + intel_gpu_freq(dev_priv, rps->cur_freq)); seq_printf(m, "Actual freq: %d MHz\n", cagf); seq_printf(m, "Idle freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq)); + intel_gpu_freq(dev_priv, rps->idle_freq)); seq_printf(m, "Min freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq)); + intel_gpu_freq(dev_priv, rps->min_freq)); seq_printf(m, "Boost freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq)); + intel_gpu_freq(dev_priv, rps->boost_freq)); seq_printf(m, "Max freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); + intel_gpu_freq(dev_priv, rps->max_freq)); seq_printf(m, "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq)); + intel_gpu_freq(dev_priv, rps->efficient_freq)); } else { seq_puts(m, "no P-state info available\n"); } @@ -1447,21 +1490,11 @@ static void print_rc6_res(struct seq_file *m, static int vlv_drpc_info(struct seq_file *m) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - u32 rpmodectl1, rcctl1, pw_status; + u32 rcctl1, pw_status; pw_status = I915_READ(VLV_GTLC_PW_STATUS); - rpmodectl1 = I915_READ(GEN6_RP_CONTROL); rcctl1 = I915_READ(GEN6_RC_CONTROL); - seq_printf(m, "Video Turbo Mode: %s\n", - yesno(rpmodectl1 & GEN6_RP_MEDIA_TURBO)); - seq_printf(m, "Turbo enabled: %s\n", - yesno(rpmodectl1 & GEN6_RP_ENABLE)); - seq_printf(m, "HW control enabled: %s\n", - yesno(rpmodectl1 & GEN6_RP_ENABLE)); - seq_printf(m, "SW control enabled: %s\n", - yesno((rpmodectl1 & GEN6_RP_MEDIA_MODE_MASK) == - GEN6_RP_MEDIA_SW_MODE)); seq_printf(m, "RC6 Enabled: %s\n", yesno(rcctl1 & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1)))); @@ -1479,7 +1512,7 @@ static int vlv_drpc_info(struct seq_file *m) static int gen6_drpc_info(struct seq_file *m) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - u32 rpmodectl1, gt_core_status, rcctl1, rc6vids = 0; + u32 gt_core_status, rcctl1, rc6vids = 0; u32 gen9_powergate_enable = 0, gen9_powergate_status = 0; unsigned forcewake_count; int count = 0; @@ -1498,24 +1531,16 @@ static int gen6_drpc_info(struct seq_file *m) gt_core_status = I915_READ_FW(GEN6_GT_CORE_STATUS); trace_i915_reg_rw(false, GEN6_GT_CORE_STATUS, gt_core_status, 4, true); - rpmodectl1 = I915_READ(GEN6_RP_CONTROL); rcctl1 = I915_READ(GEN6_RC_CONTROL); if (INTEL_GEN(dev_priv) >= 9) { gen9_powergate_enable = I915_READ(GEN9_PG_ENABLE); gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS); } - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); - seq_printf(m, "Video Turbo Mode: %s\n", - yesno(rpmodectl1 & GEN6_RP_MEDIA_TURBO)); - seq_printf(m, "HW control enabled: %s\n", - yesno(rpmodectl1 & GEN6_RP_ENABLE)); - seq_printf(m, "SW control enabled: %s\n", - yesno((rpmodectl1 & GEN6_RP_MEDIA_MODE_MASK) == - GEN6_RP_MEDIA_SW_MODE)); seq_printf(m, "RC1e Enabled: %s\n", yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); seq_printf(m, "RC6 Enabled: %s\n", @@ -1778,6 +1803,7 @@ static int i915_emon_status(struct seq_file *m, void *unused) static int i915_ring_freq_table(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct intel_rps *rps = &dev_priv->gt_pm.rps; int ret = 0; int gpu_freq, ia_freq; unsigned int max_gpu_freq, min_gpu_freq; @@ -1789,19 +1815,17 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); + ret = mutex_lock_interruptible(&dev_priv->pcu_lock); if (ret) goto out; if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { /* Convert GT frequency to 50 HZ units */ - min_gpu_freq = - dev_priv->rps.min_freq_softlimit / GEN9_FREQ_SCALER; - max_gpu_freq = - dev_priv->rps.max_freq_softlimit / GEN9_FREQ_SCALER; + min_gpu_freq = rps->min_freq_softlimit / GEN9_FREQ_SCALER; + max_gpu_freq = rps->max_freq_softlimit / GEN9_FREQ_SCALER; } else { - min_gpu_freq = dev_priv->rps.min_freq_softlimit; - max_gpu_freq = dev_priv->rps.max_freq_softlimit; + min_gpu_freq = rps->min_freq_softlimit; + max_gpu_freq = rps->max_freq_softlimit; } seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n"); @@ -1820,7 +1844,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) ((ia_freq >> 8) & 0xff) * 100); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); out: intel_runtime_pm_put(dev_priv); @@ -2254,25 +2278,26 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct drm_device *dev = &dev_priv->drm; + struct intel_rps *rps = &dev_priv->gt_pm.rps; struct drm_file *file; - seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled); + seq_printf(m, "RPS enabled? %d\n", rps->enabled); seq_printf(m, "GPU busy? %s [%d requests]\n", yesno(dev_priv->gt.awake), dev_priv->gt.active_requests); seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv)); seq_printf(m, "Boosts outstanding? %d\n", - atomic_read(&dev_priv->rps.num_waiters)); + atomic_read(&rps->num_waiters)); seq_printf(m, "Frequency requested %d\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq)); + intel_gpu_freq(dev_priv, rps->cur_freq)); seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n", - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit), - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit), - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); + intel_gpu_freq(dev_priv, rps->min_freq), + intel_gpu_freq(dev_priv, rps->min_freq_softlimit), + intel_gpu_freq(dev_priv, rps->max_freq_softlimit), + intel_gpu_freq(dev_priv, rps->max_freq)); seq_printf(m, " idle:%d, efficient:%d, boost:%d\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), - intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq)); + intel_gpu_freq(dev_priv, rps->idle_freq), + intel_gpu_freq(dev_priv, rps->efficient_freq), + intel_gpu_freq(dev_priv, rps->boost_freq)); mutex_lock(&dev->filelist_mutex); list_for_each_entry_reverse(file, &dev->filelist, lhead) { @@ -2284,15 +2309,15 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) seq_printf(m, "%s [%d]: %d boosts\n", task ? task->comm : "<unknown>", task ? task->pid : -1, - atomic_read(&file_priv->rps.boosts)); + atomic_read(&file_priv->rps_client.boosts)); rcu_read_unlock(); } seq_printf(m, "Kernel (anonymous) boosts: %d\n", - atomic_read(&dev_priv->rps.boosts)); + atomic_read(&rps->boosts)); mutex_unlock(&dev->filelist_mutex); if (INTEL_GEN(dev_priv) >= 6 && - dev_priv->rps.enabled && + rps->enabled && dev_priv->gt.active_requests) { u32 rpup, rpupei; u32 rpdown, rpdownei; @@ -2305,13 +2330,13 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n", - rps_power_to_str(dev_priv->rps.power)); + rps_power_to_str(rps->power)); seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n", rpup && rpupei ? 100 * rpup / rpupei : 0, - dev_priv->rps.up_threshold); + rps->up_threshold); seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n", rpdown && rpdownei ? 100 * rpdown / rpdownei : 0, - dev_priv->rps.down_threshold); + rps->down_threshold); } else { seq_puts(m, "\nRPS Autotuning inactive\n"); } @@ -2334,27 +2359,13 @@ static int i915_llc(struct seq_file *m, void *data) static int i915_huc_load_status_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; + struct drm_printer p; if (!HAS_HUC_UCODE(dev_priv)) return 0; - seq_puts(m, "HuC firmware status:\n"); - seq_printf(m, "\tpath: %s\n", huc_fw->path); - seq_printf(m, "\tfetch: %s\n", - intel_uc_fw_status_repr(huc_fw->fetch_status)); - seq_printf(m, "\tload: %s\n", - intel_uc_fw_status_repr(huc_fw->load_status)); - seq_printf(m, "\tversion wanted: %d.%d\n", - huc_fw->major_ver_wanted, huc_fw->minor_ver_wanted); - seq_printf(m, "\tversion found: %d.%d\n", - huc_fw->major_ver_found, huc_fw->minor_ver_found); - seq_printf(m, "\theader: offset is %d; size = %d\n", - huc_fw->header_offset, huc_fw->header_size); - seq_printf(m, "\tuCode: offset is %d; size = %d\n", - huc_fw->ucode_offset, huc_fw->ucode_size); - seq_printf(m, "\tRSA: offset is %d; size = %d\n", - huc_fw->rsa_offset, huc_fw->rsa_size); + p = drm_seq_file_printer(m); + intel_uc_fw_dump(&dev_priv->huc.fw, &p); intel_runtime_pm_get(dev_priv); seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2)); @@ -2366,29 +2377,14 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data) static int i915_guc_load_status_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; + struct drm_printer p; u32 tmp, i; if (!HAS_GUC_UCODE(dev_priv)) return 0; - seq_printf(m, "GuC firmware status:\n"); - seq_printf(m, "\tpath: %s\n", - guc_fw->path); - seq_printf(m, "\tfetch: %s\n", - intel_uc_fw_status_repr(guc_fw->fetch_status)); - seq_printf(m, "\tload: %s\n", - intel_uc_fw_status_repr(guc_fw->load_status)); - seq_printf(m, "\tversion wanted: %d.%d\n", - guc_fw->major_ver_wanted, guc_fw->minor_ver_wanted); - seq_printf(m, "\tversion found: %d.%d\n", - guc_fw->major_ver_found, guc_fw->minor_ver_found); - seq_printf(m, "\theader: offset is %d; size = %d\n", - guc_fw->header_offset, guc_fw->header_size); - seq_printf(m, "\tuCode: offset is %d; size = %d\n", - guc_fw->ucode_offset, guc_fw->ucode_size); - seq_printf(m, "\tRSA: offset is %d; size = %d\n", - guc_fw->rsa_offset, guc_fw->rsa_size); + p = drm_seq_file_printer(m); + intel_uc_fw_dump(&dev_priv->guc.fw, &p); intel_runtime_pm_get(dev_priv); @@ -3238,9 +3234,9 @@ static int i915_display_info(struct seq_file *m, void *unused) static int i915_engine_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct i915_gpu_error *error = &dev_priv->gpu_error; struct intel_engine_cs *engine; enum intel_engine_id id; + struct drm_printer p; intel_runtime_pm_get(dev_priv); @@ -3249,151 +3245,21 @@ static int i915_engine_info(struct seq_file *m, void *unused) seq_printf(m, "Global active requests: %d\n", dev_priv->gt.active_requests); - for_each_engine(engine, dev_priv, id) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct drm_i915_gem_request *rq; - struct rb_node *rb; - u64 addr; - - seq_printf(m, "%s\n", engine->name); - seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", - intel_engine_get_seqno(engine), - intel_engine_last_submit(engine), - engine->hangcheck.seqno, - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), - engine->timeline->inflight_seqnos); - seq_printf(m, "\tReset count: %d\n", - i915_reset_engine_count(error, engine)); - - rcu_read_lock(); - - seq_printf(m, "\tRequests:\n"); - - rq = list_first_entry(&engine->timeline->requests, - struct drm_i915_gem_request, link); - if (&rq->link != &engine->timeline->requests) - print_request(m, rq, "\t\tfirst "); - - rq = list_last_entry(&engine->timeline->requests, - struct drm_i915_gem_request, link); - if (&rq->link != &engine->timeline->requests) - print_request(m, rq, "\t\tlast "); - - rq = i915_gem_find_active_request(engine); - if (rq) { - print_request(m, rq, "\t\tactive "); - seq_printf(m, - "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n", - rq->head, rq->postfix, rq->tail, - rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, - rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); - } - - seq_printf(m, "\tRING_START: 0x%08x [0x%08x]\n", - I915_READ(RING_START(engine->mmio_base)), - rq ? i915_ggtt_offset(rq->ring->vma) : 0); - seq_printf(m, "\tRING_HEAD: 0x%08x [0x%08x]\n", - I915_READ(RING_HEAD(engine->mmio_base)) & HEAD_ADDR, - rq ? rq->ring->head : 0); - seq_printf(m, "\tRING_TAIL: 0x%08x [0x%08x]\n", - I915_READ(RING_TAIL(engine->mmio_base)) & TAIL_ADDR, - rq ? rq->ring->tail : 0); - seq_printf(m, "\tRING_CTL: 0x%08x [%s]\n", - I915_READ(RING_CTL(engine->mmio_base)), - I915_READ(RING_CTL(engine->mmio_base)) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? "waiting" : ""); - - rcu_read_unlock(); - - addr = intel_engine_get_active_head(engine); - seq_printf(m, "\tACTHD: 0x%08x_%08x\n", - upper_32_bits(addr), lower_32_bits(addr)); - addr = intel_engine_get_last_batch_head(engine); - seq_printf(m, "\tBBADDR: 0x%08x_%08x\n", - upper_32_bits(addr), lower_32_bits(addr)); - - if (i915_modparams.enable_execlists) { - const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; - struct intel_engine_execlists * const execlists = &engine->execlists; - u32 ptr, read, write; - unsigned int idx; - - seq_printf(m, "\tExeclist status: 0x%08x %08x\n", - I915_READ(RING_EXECLIST_STATUS_LO(engine)), - I915_READ(RING_EXECLIST_STATUS_HI(engine))); - - ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); - read = GEN8_CSB_READ_PTR(ptr); - write = GEN8_CSB_WRITE_PTR(ptr); - seq_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n", - read, execlists->csb_head, - write, - intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)), - yesno(test_bit(ENGINE_IRQ_EXECLIST, - &engine->irq_posted))); - if (read >= GEN8_CSB_ENTRIES) - read = 0; - if (write >= GEN8_CSB_ENTRIES) - write = 0; - if (read > write) - write += GEN8_CSB_ENTRIES; - while (read < write) { - idx = ++read % GEN8_CSB_ENTRIES; - seq_printf(m, "\tExeclist CSB[%d]: 0x%08x [0x%08x in hwsp], context: %d [%d in hwsp]\n", - idx, - I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)), - hws[idx * 2], - I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx)), - hws[idx * 2 + 1]); - } - - rcu_read_lock(); - for (idx = 0; idx < execlists_num_ports(execlists); idx++) { - unsigned int count; - - rq = port_unpack(&execlists->port[idx], &count); - if (rq) { - seq_printf(m, "\t\tELSP[%d] count=%d, ", - idx, count); - print_request(m, rq, "rq: "); - } else { - seq_printf(m, "\t\tELSP[%d] idle\n", - idx); - } - } - rcu_read_unlock(); - - spin_lock_irq(&engine->timeline->lock); - for (rb = execlists->first; rb; rb = rb_next(rb)) { - struct i915_priolist *p = - rb_entry(rb, typeof(*p), node); - - list_for_each_entry(rq, &p->requests, - priotree.link) - print_request(m, rq, "\t\tQ "); - } - spin_unlock_irq(&engine->timeline->lock); - } else if (INTEL_GEN(dev_priv) > 6) { - seq_printf(m, "\tPP_DIR_BASE: 0x%08x\n", - I915_READ(RING_PP_DIR_BASE(engine))); - seq_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n", - I915_READ(RING_PP_DIR_BASE_READ(engine))); - seq_printf(m, "\tPP_DIR_DCLV: 0x%08x\n", - I915_READ(RING_PP_DIR_DCLV(engine))); - } + p = drm_seq_file_printer(m); + for_each_engine(engine, dev_priv, id) + intel_engine_dump(engine, &p); - spin_lock_irq(&b->rb_lock); - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = rb_entry(rb, typeof(*w), node); + intel_runtime_pm_put(dev_priv); - seq_printf(m, "\t%s [%d] waiting for %x\n", - w->tsk->comm, w->tsk->pid, w->seqno); - } - spin_unlock_irq(&b->rb_lock); + return 0; +} - seq_puts(m, "\n"); - } +static int i915_shrinker_info(struct seq_file *m, void *unused) +{ + struct drm_i915_private *i915 = node_to_i915(m->private); - intel_runtime_pm_put(dev_priv); + seq_printf(m, "seeks = %d\n", i915->mm.shrinker.seeks); + seq_printf(m, "batch = %lu\n", i915->mm.shrinker.batch); return 0; } @@ -4258,8 +4124,7 @@ fault_irq_set(struct drm_i915_private *i915, mutex_unlock(&i915->drm.struct_mutex); /* Flush idle worker to disarm irq */ - while (flush_delayed_work(&i915->gt.idle_work)) - ; + drain_delayed_work(&i915->gt.idle_work); return 0; @@ -4314,18 +4179,20 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops, i915_ring_test_irq_get, i915_ring_test_irq_set, "0x%08llx\n"); -#define DROP_UNBOUND 0x1 -#define DROP_BOUND 0x2 -#define DROP_RETIRE 0x4 -#define DROP_ACTIVE 0x8 -#define DROP_FREED 0x10 -#define DROP_SHRINK_ALL 0x20 +#define DROP_UNBOUND BIT(0) +#define DROP_BOUND BIT(1) +#define DROP_RETIRE BIT(2) +#define DROP_ACTIVE BIT(3) +#define DROP_FREED BIT(4) +#define DROP_SHRINK_ALL BIT(5) +#define DROP_IDLE BIT(6) #define DROP_ALL (DROP_UNBOUND | \ DROP_BOUND | \ DROP_RETIRE | \ DROP_ACTIVE | \ DROP_FREED | \ - DROP_SHRINK_ALL) + DROP_SHRINK_ALL |\ + DROP_IDLE) static int i915_drop_caches_get(void *data, u64 *val) { @@ -4341,7 +4208,8 @@ i915_drop_caches_set(void *data, u64 val) struct drm_device *dev = &dev_priv->drm; int ret = 0; - DRM_DEBUG("Dropping caches: 0x%08llx\n", val); + DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n", + val, val & DROP_ALL); /* No need to check and wait for gpu resets, only libdrm auto-restarts * on ioctls on -EAGAIN. */ @@ -4372,6 +4240,9 @@ i915_drop_caches_set(void *data, u64 val) i915_gem_shrink_all(dev_priv); fs_reclaim_release(GFP_KERNEL); + if (val & DROP_IDLE) + drain_delayed_work(&dev_priv->gt.idle_work); + if (val & DROP_FREED) { synchronize_rcu(); i915_gem_drain_freed_objects(dev_priv); @@ -4392,7 +4263,7 @@ i915_max_freq_get(void *data, u64 *val) if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - *val = intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit); + *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.max_freq_softlimit); return 0; } @@ -4400,6 +4271,7 @@ static int i915_max_freq_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 hw_max, hw_min; int ret; @@ -4408,7 +4280,7 @@ i915_max_freq_set(void *data, u64 val) DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val); - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); + ret = mutex_lock_interruptible(&dev_priv->pcu_lock); if (ret) return ret; @@ -4417,20 +4289,20 @@ i915_max_freq_set(void *data, u64 val) */ val = intel_freq_opcode(dev_priv, val); - hw_max = dev_priv->rps.max_freq; - hw_min = dev_priv->rps.min_freq; + hw_max = rps->max_freq; + hw_min = rps->min_freq; - if (val < hw_min || val > hw_max || val < dev_priv->rps.min_freq_softlimit) { - mutex_unlock(&dev_priv->rps.hw_lock); + if (val < hw_min || val > hw_max || val < rps->min_freq_softlimit) { + mutex_unlock(&dev_priv->pcu_lock); return -EINVAL; } - dev_priv->rps.max_freq_softlimit = val; + rps->max_freq_softlimit = val; if (intel_set_rps(dev_priv, val)) DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); return 0; } @@ -4447,7 +4319,7 @@ i915_min_freq_get(void *data, u64 *val) if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - *val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit); + *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.min_freq_softlimit); return 0; } @@ -4455,6 +4327,7 @@ static int i915_min_freq_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 hw_max, hw_min; int ret; @@ -4463,7 +4336,7 @@ i915_min_freq_set(void *data, u64 val) DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val); - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); + ret = mutex_lock_interruptible(&dev_priv->pcu_lock); if (ret) return ret; @@ -4472,21 +4345,21 @@ i915_min_freq_set(void *data, u64 val) */ val = intel_freq_opcode(dev_priv, val); - hw_max = dev_priv->rps.max_freq; - hw_min = dev_priv->rps.min_freq; + hw_max = rps->max_freq; + hw_min = rps->min_freq; if (val < hw_min || - val > hw_max || val > dev_priv->rps.max_freq_softlimit) { - mutex_unlock(&dev_priv->rps.hw_lock); + val > hw_max || val > rps->max_freq_softlimit) { + mutex_unlock(&dev_priv->pcu_lock); return -EINVAL; } - dev_priv->rps.min_freq_softlimit = val; + rps->min_freq_softlimit = val; if (intel_set_rps(dev_priv, val)) DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); return 0; } @@ -4838,9 +4711,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_capabilities", i915_capabilities, 0}, {"i915_gem_objects", i915_gem_object_info, 0}, {"i915_gem_gtt", i915_gem_gtt_info, 0}, - {"i915_gem_pin_display", i915_gem_gtt_info, 0, (void *)1}, {"i915_gem_stolen", i915_gem_stolen_list_info }, - {"i915_gem_request", i915_gem_request_info, 0}, {"i915_gem_seqno", i915_gem_seqno_info, 0}, {"i915_gem_fence_regs", i915_gem_fence_regs_info, 0}, {"i915_gem_interrupt", i915_interrupt_info, 0}, @@ -4878,6 +4749,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_dmc_info", i915_dmc_info, 0}, {"i915_display_info", i915_display_info, 0}, {"i915_engine_info", i915_engine_info, 0}, + {"i915_shrinker_info", i915_shrinker_info, 0}, {"i915_semaphore_status", i915_semaphore_status, 0}, {"i915_shared_dplls_info", i915_shared_dplls_info, 0}, {"i915_dp_mst_info", i915_dp_mst_info, 0}, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 59ac9199b35d..960d3d8b95b8 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -367,9 +367,18 @@ static int i915_getparam(struct drm_device *dev, void *data, value = i915_gem_mmap_gtt_version(); break; case I915_PARAM_HAS_SCHEDULER: - value = dev_priv->engine[RCS] && - dev_priv->engine[RCS]->schedule; + value = 0; + if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule) { + value |= I915_SCHEDULER_CAP_ENABLED; + value |= I915_SCHEDULER_CAP_PRIORITY; + + if (INTEL_INFO(dev_priv)->has_logical_ring_preemption && + i915_modparams.enable_execlists && + !i915_modparams.enable_guc_submission) + value |= I915_SCHEDULER_CAP_PREEMPTION; + } break; + case I915_PARAM_MMAP_VERSION: /* Remember to bump this if the version changes! */ case I915_PARAM_HAS_GEM: @@ -606,9 +615,10 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv) intel_uc_fini_hw(dev_priv); i915_gem_cleanup_engines(dev_priv); i915_gem_contexts_fini(dev_priv); - i915_gem_cleanup_userptr(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); + i915_gem_cleanup_userptr(dev_priv); + i915_gem_drain_freed_objects(dev_priv); WARN_ON(!list_empty(&dev_priv->contexts.list)); @@ -1007,6 +1017,8 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv) intel_uncore_init(dev_priv); + intel_uc_init_mmio(dev_priv); + ret = intel_engines_init_mmio(dev_priv); if (ret) goto err_uncore; @@ -1313,7 +1325,7 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) * becaue the HDA driver may require us to enable the audio power * domain during system suspend. */ - pdev->dev_flags |= PCI_DEV_FLAGS_NEEDS_RESUME; + dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP); ret = i915_driver_init_early(dev_priv, ent); if (ret < 0) @@ -1580,7 +1592,7 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) intel_display_set_init_power(dev_priv, false); - fw_csr = !IS_GEN9_LP(dev_priv) && + fw_csr = !IS_GEN9_LP(dev_priv) && !hibernation && suspend_to_idle(dev_priv) && dev_priv->csr.dmc_payload; /* * In case of firmware assisted context save/restore don't manually @@ -2070,11 +2082,14 @@ static int i915_pm_resume(struct device *kdev) /* freeze: before creating the hibernation_image */ static int i915_pm_freeze(struct device *kdev) { + struct drm_device *dev = &kdev_to_i915(kdev)->drm; int ret; - ret = i915_pm_suspend(kdev); - if (ret) - return ret; + if (dev->switch_power_state != DRM_SWITCH_POWER_OFF) { + ret = i915_drm_suspend(dev); + if (ret) + return ret; + } ret = i915_gem_freeze(kdev_to_i915(kdev)); if (ret) @@ -2085,11 +2100,14 @@ static int i915_pm_freeze(struct device *kdev) static int i915_pm_freeze_late(struct device *kdev) { + struct drm_device *dev = &kdev_to_i915(kdev)->drm; int ret; - ret = i915_pm_suspend_late(kdev); - if (ret) - return ret; + if (dev->switch_power_state != DRM_SWITCH_POWER_OFF) { + ret = i915_drm_suspend_late(dev, true); + if (ret) + return ret; + } ret = i915_gem_freeze_late(kdev_to_i915(kdev)); if (ret) @@ -2485,7 +2503,7 @@ static int intel_runtime_suspend(struct device *kdev) struct drm_i915_private *dev_priv = to_i915(dev); int ret; - if (WARN_ON_ONCE(!(dev_priv->rps.enabled && intel_enable_rc6()))) + if (WARN_ON_ONCE(!(dev_priv->gt_pm.rc6.enabled && intel_rc6_enabled()))) return -ENODEV; if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev_priv))) @@ -2527,12 +2545,12 @@ static int intel_runtime_suspend(struct device *kdev) intel_uncore_suspend(dev_priv); enable_rpm_wakeref_asserts(dev_priv); - WARN_ON_ONCE(atomic_read(&dev_priv->pm.wakeref_count)); + WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count)); if (intel_uncore_arm_unclaimed_mmio_detection(dev_priv)) DRM_ERROR("Unclaimed access detected prior to suspending\n"); - dev_priv->pm.suspended = true; + dev_priv->runtime_pm.suspended = true; /* * FIXME: We really should find a document that references the arguments @@ -2578,11 +2596,11 @@ static int intel_runtime_resume(struct device *kdev) DRM_DEBUG_KMS("Resuming device\n"); - WARN_ON_ONCE(atomic_read(&dev_priv->pm.wakeref_count)); + WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count)); disable_rpm_wakeref_asserts(dev_priv); intel_opregion_notify_adapter(dev_priv, PCI_D0); - dev_priv->pm.suspended = false; + dev_priv->runtime_pm.suspended = false; if (intel_uncore_unclaimed_mmio(dev_priv)) DRM_DEBUG_DRIVER("Unclaimed access during suspend, bios?\n"); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7ca11318ac69..54b5d4c582b6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -80,8 +80,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20170929" -#define DRIVER_TIMESTAMP 1506682238 +#define DRIVER_DATE "20171023" +#define DRIVER_TIMESTAMP 1508748913 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions @@ -609,7 +609,7 @@ struct drm_i915_file_private { struct intel_rps_client { atomic_t boosts; - } rps; + } rps_client; unsigned int bsd_engine; @@ -783,8 +783,8 @@ struct intel_csr { func(has_l3_dpf); \ func(has_llc); \ func(has_logical_ring_contexts); \ + func(has_logical_ring_preemption); \ func(has_overlay); \ - func(has_pipe_cxsr); \ func(has_pooled_eu); \ func(has_psr); \ func(has_rc6); \ @@ -868,6 +868,8 @@ struct intel_device_info { u8 num_sprites[I915_MAX_PIPES]; u8 num_scalers[I915_MAX_PIPES]; + unsigned int page_sizes; /* page sizes supported by the HW */ + #define DEFINE_FLAG(name) u8 name:1 DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); #undef DEFINE_FLAG @@ -981,6 +983,7 @@ struct i915_gpu_state { pid_t pid; u32 handle; u32 hw_id; + int priority; int ban_score; int active; int guilty; @@ -1003,6 +1006,7 @@ struct i915_gpu_state { long jiffies; pid_t pid; u32 context; + int priority; int ban_score; u32 seqno; u32 head; @@ -1103,6 +1107,16 @@ struct intel_fbc { int src_w; int src_h; bool visible; + /* + * Display surface base address adjustement for + * pageflips. Note that on gen4+ this only adjusts up + * to a tile, offsets within a tile are handled in + * the hw itself (with the TILEOFF register). + */ + int adjusted_x; + int adjusted_y; + + int y; } plane; struct { @@ -1312,7 +1326,7 @@ struct intel_rps_ei { u32 media_c0; }; -struct intel_gen6_power_mgmt { +struct intel_rps { /* * work, interrupts_enabled and pm_iir are protected by * dev_priv->irq_lock @@ -1353,20 +1367,26 @@ struct intel_gen6_power_mgmt { enum { LOW_POWER, BETWEEN, HIGH_POWER } power; bool enabled; - struct delayed_work autoenable_work; atomic_t num_waiters; atomic_t boosts; /* manual wa residency calculations */ struct intel_rps_ei ei; +}; - /* - * Protects RPS/RC6 register access and PCU communication. - * Must be taken after struct_mutex if nested. Note that - * this lock may be held for long periods of time when - * talking to hw - so only take it when talking to hw! - */ - struct mutex hw_lock; +struct intel_rc6 { + bool enabled; +}; + +struct intel_llc_pstate { + bool enabled; +}; + +struct intel_gen6_power_mgmt { + struct intel_rps rps; + struct intel_rc6 rc6; + struct intel_llc_pstate llc_pstate; + struct delayed_work autoenable_work; }; /* defined intel_pm.c */ @@ -1479,6 +1499,9 @@ struct i915_gem_mm { * always the inner lock when overlapping with struct_mutex. */ struct mutex stolen_lock; + /* Protects bound_list/unbound_list and #drm_i915_gem_object.mm.link */ + spinlock_t obj_lock; + /** List of all objects in gtt_space. Used to restore gtt * mappings on resume */ struct list_head bound_list; @@ -1499,6 +1522,7 @@ struct i915_gem_mm { */ struct llist_head free_list; struct work_struct free_work; + spinlock_t free_lock; /** * Small stash of WC pages @@ -1508,6 +1532,11 @@ struct i915_gem_mm { /** Usable portion of the GTT for GEM */ dma_addr_t stolen_base; /* limited to low memory (32-bit) */ + /** + * tmpfs instance used for shmem backed objects + */ + struct vfsmount *gemfs; + /** PPGTT used for aliasing the PPGTT with the GTT */ struct i915_hw_ppgtt *aliasing_ppgtt; @@ -1749,6 +1778,8 @@ struct intel_vbt_data { u16 panel_id; struct mipi_config *config; struct mipi_pps_data *pps; + u16 bl_ports; + u16 cabc_ports; u8 seq_version; u32 size; u8 *data; @@ -1944,13 +1975,7 @@ struct i915_wa_reg { u32 mask; }; -/* - * RING_MAX_NONPRIV_SLOTS is per-engine but at this point we are only - * allowing it for RCS as we don't foresee any requirement of having - * a whitelist for other engines. When it is really required for - * other engines then the limit need to be increased. - */ -#define I915_MAX_WA_REGS (16 + RING_MAX_NONPRIV_SLOTS) +#define I915_MAX_WA_REGS 16 struct i915_workarounds { struct i915_wa_reg reg[I915_MAX_WA_REGS]; @@ -2251,8 +2276,11 @@ struct drm_i915_private { wait_queue_head_t gmbus_wait_queue; struct pci_dev *bridge_dev; - struct i915_gem_context *kernel_context; struct intel_engine_cs *engine[I915_NUM_ENGINES]; + /* Context used internally to idle the GPU and setup initial state */ + struct i915_gem_context *kernel_context; + /* Context only to be used for injecting preemption commands */ + struct i915_gem_context *preempt_context; struct i915_vma *semaphore; struct drm_dma_handle *status_page_dmah; @@ -2408,8 +2436,16 @@ struct drm_i915_private { /* Cannot be determined by PCIID. You must always read a register. */ u32 edram_cap; - /* gen6+ rps state */ - struct intel_gen6_power_mgmt rps; + /* + * Protects RPS/RC6 register access and PCU communication. + * Must be taken after struct_mutex if nested. Note that + * this lock may be held for long periods of time when + * talking to hw - so only take it when talking to hw! + */ + struct mutex pcu_lock; + + /* gen6+ GT PM state */ + struct intel_gen6_power_mgmt gt_pm; /* ilk-only ips/rps state. Everything in here is protected by the global * mchdev_lock in intel_pm.c */ @@ -2520,7 +2556,7 @@ struct drm_i915_private { bool distrust_bios_wm; } wm; - struct i915_runtime_pm pm; + struct i915_runtime_pm runtime_pm; struct { bool initialized; @@ -2859,6 +2895,21 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg) (((__iter).curr += PAGE_SIZE) >= (__iter).max) ? \ (__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0 : 0) +static inline unsigned int i915_sg_page_sizes(struct scatterlist *sg) +{ + unsigned int page_sizes; + + page_sizes = 0; + while (sg) { + GEM_BUG_ON(sg->offset); + GEM_BUG_ON(!IS_ALIGNED(sg->length, PAGE_SIZE)); + page_sizes |= sg->length; + sg = __sg_next(sg); + } + + return page_sizes; +} + static inline unsigned int i915_sg_segment_size(void) { unsigned int size = swiotlb_max_segment(); @@ -3035,6 +3086,7 @@ intel_info(const struct drm_i915_private *dev_priv) #define CNL_REVID_A0 0x0 #define CNL_REVID_B0 0x1 +#define CNL_REVID_C0 0x2 #define IS_CNL_REVID(p, since, until) \ (IS_CANNONLAKE(p) && IS_REVID(p, since, until)) @@ -3088,6 +3140,10 @@ intel_info(const struct drm_i915_private *dev_priv) #define USES_PPGTT(dev_priv) (i915_modparams.enable_ppgtt) #define USES_FULL_PPGTT(dev_priv) (i915_modparams.enable_ppgtt >= 2) #define USES_FULL_48BIT_PPGTT(dev_priv) (i915_modparams.enable_ppgtt == 3) +#define HAS_PAGE_SIZES(dev_priv, sizes) ({ \ + GEM_BUG_ON((sizes) == 0); \ + ((sizes) & ~(dev_priv)->info.page_sizes) == 0; \ +}) #define HAS_OVERLAY(dev_priv) ((dev_priv)->info.has_overlay) #define OVERLAY_NEEDS_PHYSICAL(dev_priv) \ @@ -3122,7 +3178,6 @@ intel_info(const struct drm_i915_private *dev_priv) #define I915_HAS_HOTPLUG(dev_priv) ((dev_priv)->info.has_hotplug) #define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2) -#define HAS_PIPE_CXSR(dev_priv) ((dev_priv)->info.has_pipe_cxsr) #define HAS_FBC(dev_priv) ((dev_priv)->info.has_fbc) #define HAS_CUR_FBC(dev_priv) (!HAS_GMCH_DISPLAY(dev_priv) && INTEL_INFO(dev_priv)->gen >= 7) @@ -3504,7 +3559,8 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, unsigned long n); void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages); + struct sg_table *pages, + unsigned int sg_page_sizes); int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj); static inline int __must_check @@ -3518,10 +3574,16 @@ i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) return __i915_gem_object_get_pages(obj); } +static inline bool +i915_gem_object_has_pages(struct drm_i915_gem_object *obj) +{ + return !IS_ERR_OR_NULL(READ_ONCE(obj->mm.pages)); +} + static inline void __i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) { - GEM_BUG_ON(!obj->mm.pages); + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); atomic_inc(&obj->mm.pages_pin_count); } @@ -3535,8 +3597,8 @@ i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj) static inline void __i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) { + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - GEM_BUG_ON(!obj->mm.pages); atomic_dec(&obj->mm.pages_pin_count); } @@ -3726,8 +3788,6 @@ i915_vm_to_ppgtt(struct i915_address_space *vm) } /* i915_gem_fence_reg.c */ -int __must_check i915_vma_get_fence(struct i915_vma *vma); -int __must_check i915_vma_put_fence(struct i915_vma *vma); struct drm_i915_fence_reg * i915_reserve_fence(struct drm_i915_private *dev_priv); void i915_unreserve_fence(struct drm_i915_fence_reg *fence); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 73eeb6b1f1cd..3a140eedfc83 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -35,6 +35,7 @@ #include "intel_drv.h" #include "intel_frontbuffer.h" #include "intel_mocs.h" +#include "i915_gemfs.h" #include <linux/dma-fence-array.h> #include <linux/kthread.h> #include <linux/reservation.h> @@ -55,7 +56,7 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) return true; - return obj->pin_display; + return obj->pin_global; /* currently in use by HW, keep flushed */ } static int @@ -161,8 +162,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, return 0; } -static struct sg_table * -i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) { struct address_space *mapping = obj->base.filp->f_mapping; drm_dma_handle_t *phys; @@ -170,9 +170,10 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) struct scatterlist *sg; char *vaddr; int i; + int err; if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) - return ERR_PTR(-EINVAL); + return -EINVAL; /* Always aligning to the object size, allows a single allocation * to handle all possible callers, and given typical object sizes, @@ -182,7 +183,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) roundup_pow_of_two(obj->base.size), roundup_pow_of_two(obj->base.size)); if (!phys) - return ERR_PTR(-ENOMEM); + return -ENOMEM; vaddr = phys->vaddr; for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { @@ -191,7 +192,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) page = shmem_read_mapping_page(mapping, i); if (IS_ERR(page)) { - st = ERR_CAST(page); + err = PTR_ERR(page); goto err_phys; } @@ -208,13 +209,13 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) { - st = ERR_PTR(-ENOMEM); + err = -ENOMEM; goto err_phys; } if (sg_alloc_table(st, 1, GFP_KERNEL)) { kfree(st); - st = ERR_PTR(-ENOMEM); + err = -ENOMEM; goto err_phys; } @@ -226,11 +227,15 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) sg_dma_len(sg) = obj->base.size; obj->phys_handle = phys; - return st; + + __i915_gem_object_set_pages(obj, st, sg->length); + + return 0; err_phys: drm_pci_free(obj->base.dev, phys); - return st; + + return err; } static void __start_cpu_write(struct drm_i915_gem_object *obj) @@ -353,7 +358,7 @@ static long i915_gem_object_wait_fence(struct dma_fence *fence, unsigned int flags, long timeout, - struct intel_rps_client *rps) + struct intel_rps_client *rps_client) { struct drm_i915_gem_request *rq; @@ -386,11 +391,11 @@ i915_gem_object_wait_fence(struct dma_fence *fence, * forcing the clocks too high for the whole system, we only allow * each client to waitboost once in a busy period. */ - if (rps) { + if (rps_client) { if (INTEL_GEN(rq->i915) >= 6) - gen6_rps_boost(rq, rps); + gen6_rps_boost(rq, rps_client); else - rps = NULL; + rps_client = NULL; } timeout = i915_wait_request(rq, flags, timeout); @@ -406,7 +411,7 @@ static long i915_gem_object_wait_reservation(struct reservation_object *resv, unsigned int flags, long timeout, - struct intel_rps_client *rps) + struct intel_rps_client *rps_client) { unsigned int seq = __read_seqcount_begin(&resv->seq); struct dma_fence *excl; @@ -425,7 +430,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, for (i = 0; i < count; i++) { timeout = i915_gem_object_wait_fence(shared[i], flags, timeout, - rps); + rps_client); if (timeout < 0) break; @@ -442,7 +447,8 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, } if (excl && timeout >= 0) { - timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps); + timeout = i915_gem_object_wait_fence(excl, flags, timeout, + rps_client); prune_fences = timeout >= 0; } @@ -538,7 +544,7 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj, unsigned int flags, long timeout, - struct intel_rps_client *rps) + struct intel_rps_client *rps_client) { might_sleep(); #if IS_ENABLED(CONFIG_LOCKDEP) @@ -550,7 +556,7 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj, timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout, - rps); + rps_client); return timeout < 0 ? timeout : 0; } @@ -558,7 +564,7 @@ static struct intel_rps_client *to_rps_client(struct drm_file *file) { struct drm_i915_file_private *fpriv = file->driver_priv; - return &fpriv->rps; + return &fpriv->rps_client; } static int @@ -1050,7 +1056,9 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, intel_runtime_pm_get(i915); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | PIN_NONBLOCK); + PIN_MAPPABLE | + PIN_NONFAULT | + PIN_NONBLOCK); if (!IS_ERR(vma)) { node.start = i915_ggtt_offset(vma); node.allocated = false; @@ -1232,9 +1240,27 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, if (ret) return ret; - intel_runtime_pm_get(i915); + if (i915_gem_object_has_struct_page(obj)) { + /* + * Avoid waking the device up if we can fallback, as + * waking/resuming is very slow (worst-case 10-100 ms + * depending on PCI sleeps and our own resume time). + * This easily dwarfs any performance advantage from + * using the cache bypass of indirect GGTT access. + */ + if (!intel_runtime_pm_get_if_in_use(i915)) { + ret = -EFAULT; + goto out_unlock; + } + } else { + /* No backing pages, no fallback, we must force GGTT access */ + intel_runtime_pm_get(i915); + } + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | PIN_NONBLOCK); + PIN_MAPPABLE | + PIN_NONFAULT | + PIN_NONBLOCK); if (!IS_ERR(vma)) { node.start = i915_ggtt_offset(vma); node.allocated = false; @@ -1247,7 +1273,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) { ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); if (ret) - goto out_unlock; + goto out_rpm; GEM_BUG_ON(!node.allocated); } @@ -1310,8 +1336,9 @@ out_unpin: } else { i915_vma_unpin(vma); } -out_unlock: +out_rpm: intel_runtime_pm_put(i915); +out_unlock: mutex_unlock(&i915->drm.struct_mutex); return ret; } @@ -1527,6 +1554,8 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) struct list_head *list; struct i915_vma *vma; + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + list_for_each_entry(vma, &obj->vma_list, obj_link) { if (!i915_vma_is_ggtt(vma)) break; @@ -1541,8 +1570,10 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) } i915 = to_i915(obj->base.dev); + spin_lock(&i915->mm.obj_lock); list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; - list_move_tail(&obj->global_link, list); + list_move_tail(&obj->mm.link, list); + spin_unlock(&i915->mm.obj_lock); } /** @@ -1905,22 +1936,27 @@ int i915_gem_fault(struct vm_fault *vmf) if (ret) goto err_unpin; - ret = i915_vma_get_fence(vma); + ret = i915_vma_pin_fence(vma); if (ret) goto err_unpin; - /* Mark as being mmapped into userspace for later revocation */ - assert_rpm_wakelock_held(dev_priv); - if (list_empty(&obj->userfault_link)) - list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); - /* Finally, remap it using the new GTT offset */ ret = remap_io_mapping(area, area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT, min_t(u64, vma->size, area->vm_end - area->vm_start), &ggtt->mappable); + if (ret) + goto err_fence; + + /* Mark as being mmapped into userspace for later revocation */ + assert_rpm_wakelock_held(dev_priv); + if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) + list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); + GEM_BUG_ON(!obj->userfault_count); +err_fence: + i915_vma_unpin_fence(vma); err_unpin: __i915_vma_unpin(vma); err_unlock: @@ -1972,6 +2008,25 @@ err: return ret; } +static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) +{ + struct i915_vma *vma; + + GEM_BUG_ON(!obj->userfault_count); + + obj->userfault_count = 0; + list_del(&obj->userfault_link); + drm_vma_node_unmap(&obj->base.vma_node, + obj->base.dev->anon_inode->i_mapping); + + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!i915_vma_is_ggtt(vma)) + break; + + i915_vma_unset_userfault(vma); + } +} + /** * i915_gem_release_mmap - remove physical page mappings * @obj: obj in question @@ -2002,12 +2057,10 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) lockdep_assert_held(&i915->drm.struct_mutex); intel_runtime_pm_get(i915); - if (list_empty(&obj->userfault_link)) + if (!obj->userfault_count) goto out; - list_del_init(&obj->userfault_link); - drm_vma_node_unmap(&obj->base.vma_node, - obj->base.dev->anon_inode->i_mapping); + __i915_gem_object_release_mmap(obj); /* Ensure that the CPU's PTE are revoked and there are not outstanding * memory transactions from userspace before we return. The TLB @@ -2035,11 +2088,8 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) */ list_for_each_entry_safe(obj, on, - &dev_priv->mm.userfault_list, userfault_link) { - list_del_init(&obj->userfault_link); - drm_vma_node_unmap(&obj->base.vma_node, - obj->base.dev->anon_inode->i_mapping); - } + &dev_priv->mm.userfault_list, userfault_link) + __i915_gem_object_release_mmap(obj); /* The fence will be lost when the device powers down. If any were * in use by hardware (i.e. they are pinned), we should not be powering @@ -2062,7 +2112,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) if (!reg->vma) continue; - GEM_BUG_ON(!list_empty(®->vma->obj->userfault_link)); + GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); reg->dirty = true; } } @@ -2167,7 +2217,7 @@ void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) struct address_space *mapping; lockdep_assert_held(&obj->mm.lock); - GEM_BUG_ON(obj->mm.pages); + GEM_BUG_ON(i915_gem_object_has_pages(obj)); switch (obj->mm.madv) { case I915_MADV_DONTNEED: @@ -2217,20 +2267,23 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) struct radix_tree_iter iter; void __rcu **slot; + rcu_read_lock(); radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) radix_tree_delete(&obj->mm.get_page.radix, iter.index); + rcu_read_unlock(); } void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, enum i915_mm_subclass subclass) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *pages; if (i915_gem_object_has_pinned_pages(obj)) return; GEM_BUG_ON(obj->bind_count); - if (!READ_ONCE(obj->mm.pages)) + if (!i915_gem_object_has_pages(obj)) return; /* May be called by shrinker from within get_pages() (on another bo) */ @@ -2244,6 +2297,10 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, pages = fetch_and_zero(&obj->mm.pages); GEM_BUG_ON(!pages); + spin_lock(&i915->mm.obj_lock); + list_del(&obj->mm.link); + spin_unlock(&i915->mm.obj_lock); + if (obj->mm.mapping) { void *ptr; @@ -2261,6 +2318,8 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, if (!IS_ERR(pages)) obj->ops->put_pages(obj, pages); + obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; + unlock: mutex_unlock(&obj->mm.lock); } @@ -2291,8 +2350,7 @@ static bool i915_sg_trim(struct sg_table *orig_st) return true; } -static struct sg_table * -i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); const unsigned long page_count = obj->base.size / PAGE_SIZE; @@ -2304,6 +2362,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) struct page *page; unsigned long last_pfn = 0; /* suppress gcc warning */ unsigned int max_segment = i915_sg_segment_size(); + unsigned int sg_page_sizes; gfp_t noreclaim; int ret; @@ -2316,12 +2375,12 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) st = kmalloc(sizeof(*st), GFP_KERNEL); if (st == NULL) - return ERR_PTR(-ENOMEM); + return -ENOMEM; rebuild_st: if (sg_alloc_table(st, page_count, GFP_KERNEL)) { kfree(st); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } /* Get the list of pages out of our struct file. They'll be pinned @@ -2335,6 +2394,7 @@ rebuild_st: sg = st->sgl; st->nents = 0; + sg_page_sizes = 0; for (i = 0; i < page_count; i++) { const unsigned int shrink[] = { I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, @@ -2387,8 +2447,10 @@ rebuild_st: if (!i || sg->length >= max_segment || page_to_pfn(page) != last_pfn + 1) { - if (i) + if (i) { + sg_page_sizes |= sg->length; sg = sg_next(sg); + } st->nents++; sg_set_page(sg, page, PAGE_SIZE, 0); } else { @@ -2399,8 +2461,10 @@ rebuild_st: /* Check that the i965g/gm workaround works. */ WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); } - if (sg) /* loop terminated early; short sg table */ + if (sg) { /* loop terminated early; short sg table */ + sg_page_sizes |= sg->length; sg_mark_end(sg); + } /* Trim unused sg entries to avoid wasting memory. */ i915_sg_trim(st); @@ -2429,7 +2493,9 @@ rebuild_st: if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_do_bit_17_swizzle(obj, st); - return st; + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; err_sg: sg_mark_end(sg); @@ -2450,12 +2516,17 @@ err_pages: if (ret == -ENOSPC) ret = -ENOMEM; - return ERR_PTR(ret); + return ret; } void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) + struct sg_table *pages, + unsigned int sg_page_sizes) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned long supported = INTEL_INFO(i915)->page_sizes; + int i; + lockdep_assert_held(&obj->mm.lock); obj->mm.get_page.sg_pos = pages->sgl; @@ -2464,28 +2535,48 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, obj->mm.pages = pages; if (i915_gem_object_is_tiled(obj) && - to_i915(obj->base.dev)->quirks & QUIRK_PIN_SWIZZLED_PAGES) { + i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { GEM_BUG_ON(obj->mm.quirked); __i915_gem_object_pin_pages(obj); obj->mm.quirked = true; } + + GEM_BUG_ON(!sg_page_sizes); + obj->mm.page_sizes.phys = sg_page_sizes; + + /* + * Calculate the supported page-sizes which fit into the given + * sg_page_sizes. This will give us the page-sizes which we may be able + * to use opportunistically when later inserting into the GTT. For + * example if phys=2G, then in theory we should be able to use 1G, 2M, + * 64K or 4K pages, although in practice this will depend on a number of + * other factors. + */ + obj->mm.page_sizes.sg = 0; + for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { + if (obj->mm.page_sizes.phys & ~0u << i) + obj->mm.page_sizes.sg |= BIT(i); + } + GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); + + spin_lock(&i915->mm.obj_lock); + list_add(&obj->mm.link, &i915->mm.unbound_list); + spin_unlock(&i915->mm.obj_lock); } static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) { - struct sg_table *pages; + int err; if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { DRM_DEBUG("Attempting to obtain a purgeable object\n"); return -EFAULT; } - pages = obj->ops->get_pages(obj); - if (unlikely(IS_ERR(pages))) - return PTR_ERR(pages); + err = obj->ops->get_pages(obj); + GEM_BUG_ON(!err && IS_ERR_OR_NULL(obj->mm.pages)); - __i915_gem_object_set_pages(obj, pages); - return 0; + return err; } /* Ensure that the associated pages are gathered from the backing storage @@ -2503,7 +2594,7 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) if (err) return err; - if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) { + if (unlikely(!i915_gem_object_has_pages(obj))) { GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); err = ____i915_gem_object_get_pages(obj); @@ -2588,7 +2679,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, type &= ~I915_MAP_OVERRIDE; if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { - if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) { + if (unlikely(!i915_gem_object_has_pages(obj))) { GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); ret = ____i915_gem_object_get_pages(obj); @@ -2600,7 +2691,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, atomic_inc(&obj->mm.pages_pin_count); pinned = false; } - GEM_BUG_ON(!obj->mm.pages); + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); ptr = page_unpack_bits(obj->mm.mapping, &has_type); if (ptr && has_type != type) { @@ -2655,9 +2746,12 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, * allows it to avoid the cost of retrieving a page (either swapin * or clearing-before-use) before it is overwritten. */ - if (READ_ONCE(obj->mm.pages)) + if (i915_gem_object_has_pages(obj)) return -ENODEV; + if (obj->mm.madv != I915_MADV_WILLNEED) + return -EFAULT; + /* Before the pages are instantiated the object is treated as being * in the CPU domain. The pages will be clflushed as required before * use, and we can freely write into the pages directly. If userspace @@ -2796,7 +2890,17 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request = NULL; - /* Prevent the signaler thread from updating the request + /* + * During the reset sequence, we must prevent the engine from + * entering RC6. As the context state is undefined until we restart + * the engine, if it does enter RC6 during the reset, the state + * written to the powercontext is undefined and so we may lose + * GPU state upon resume, i.e. fail to restart after a reset. + */ + intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); + + /* + * Prevent the signaler thread from updating the request * state (by calling dma_fence_signal) as we are processing * the reset. The write from the GPU of the seqno is * asynchronous and the signaler thread may see a different @@ -2807,7 +2911,8 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) */ kthread_park(engine->breadcrumbs.signaler); - /* Prevent request submission to the hardware until we have + /* + * Prevent request submission to the hardware until we have * completed the reset in i915_gem_reset_finish(). If a request * is completed by one engine, it may then queue a request * to a second via its engine->irq_tasklet *just* as we are @@ -2997,6 +3102,8 @@ void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) { tasklet_enable(&engine->execlists.irq_tasklet); kthread_unpark(engine->breadcrumbs.signaler); + + intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); } void i915_gem_reset_finish(struct drm_i915_private *dev_priv) @@ -3014,51 +3121,76 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) static void nop_submit_request(struct drm_i915_gem_request *request) { - GEM_BUG_ON(!i915_terminally_wedged(&request->i915->gpu_error)); dma_fence_set_error(&request->fence, -EIO); + i915_gem_request_submit(request); - intel_engine_init_global_seqno(request->engine, request->global_seqno); } -static void engine_set_wedged(struct intel_engine_cs *engine) +static void nop_complete_submit_request(struct drm_i915_gem_request *request) { - /* We need to be sure that no thread is running the old callback as - * we install the nop handler (otherwise we would submit a request - * to hardware that will never complete). In order to prevent this - * race, we wait until the machine is idle before making the swap - * (using stop_machine()). - */ - engine->submit_request = nop_submit_request; + unsigned long flags; - /* Mark all executing requests as skipped */ - engine->cancel_requests(engine); + dma_fence_set_error(&request->fence, -EIO); - /* Mark all pending requests as complete so that any concurrent - * (lockless) lookup doesn't try and wait upon the request as we - * reset it. - */ - intel_engine_init_global_seqno(engine, - intel_engine_last_submit(engine)); + spin_lock_irqsave(&request->engine->timeline->lock, flags); + __i915_gem_request_submit(request); + intel_engine_init_global_seqno(request->engine, request->global_seqno); + spin_unlock_irqrestore(&request->engine->timeline->lock, flags); } -static int __i915_gem_set_wedged_BKL(void *data) +void i915_gem_set_wedged(struct drm_i915_private *i915) { - struct drm_i915_private *i915 = data; struct intel_engine_cs *engine; enum intel_engine_id id; + /* + * First, stop submission to hw, but do not yet complete requests by + * rolling the global seqno forward (since this would complete requests + * for which we haven't set the fence error to EIO yet). + */ for_each_engine(engine, i915, id) - engine_set_wedged(engine); + engine->submit_request = nop_submit_request; - set_bit(I915_WEDGED, &i915->gpu_error.flags); - wake_up_all(&i915->gpu_error.reset_queue); + /* + * Make sure no one is running the old callback before we proceed with + * cancelling requests and resetting the completion tracking. Otherwise + * we might submit a request to the hardware which never completes. + */ + synchronize_rcu(); - return 0; -} + for_each_engine(engine, i915, id) { + /* Mark all executing requests as skipped */ + engine->cancel_requests(engine); -void i915_gem_set_wedged(struct drm_i915_private *dev_priv) -{ - stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL); + /* + * Only once we've force-cancelled all in-flight requests can we + * start to complete all requests. + */ + engine->submit_request = nop_complete_submit_request; + } + + /* + * Make sure no request can slip through without getting completed by + * either this call here to intel_engine_init_global_seqno, or the one + * in nop_complete_submit_request. + */ + synchronize_rcu(); + + for_each_engine(engine, i915, id) { + unsigned long flags; + + /* Mark all pending requests as complete so that any concurrent + * (lockless) lookup doesn't try and wait upon the request as we + * reset it. + */ + spin_lock_irqsave(&engine->timeline->lock, flags); + intel_engine_init_global_seqno(engine, + intel_engine_last_submit(engine)); + spin_unlock_irqrestore(&engine->timeline->lock, flags); + } + + set_bit(I915_WEDGED, &i915->gpu_error.flags); + wake_up_all(&i915->gpu_error.reset_queue); } bool i915_gem_unset_wedged(struct drm_i915_private *i915) @@ -3395,7 +3527,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) { - if (!READ_ONCE(obj->pin_display)) + if (!READ_ONCE(obj->pin_global)) return; mutex_lock(&obj->base.dev->struct_mutex); @@ -3762,10 +3894,10 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->base.dev->struct_mutex); - /* Mark the pin_display early so that we account for the + /* Mark the global pin early so that we account for the * display coherency whilst setting up the cache domains. */ - obj->pin_display++; + obj->pin_global++; /* The display engine is not coherent with the LLC cache on gen6. As * a result, we make sure that the pinning that is about to occur is @@ -3781,7 +3913,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, I915_CACHE_WT : I915_CACHE_NONE); if (ret) { vma = ERR_PTR(ret); - goto err_unpin_display; + goto err_unpin_global; } /* As the user may map the buffer once pinned in the display plane @@ -3812,7 +3944,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); } if (IS_ERR(vma)) - goto err_unpin_display; + goto err_unpin_global; vma->display_alignment = max_t(u64, vma->display_alignment, alignment); @@ -3827,8 +3959,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, return vma; -err_unpin_display: - obj->pin_display--; +err_unpin_global: + obj->pin_global--; return vma; } @@ -3837,10 +3969,10 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) { lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - if (WARN_ON(vma->obj->pin_display == 0)) + if (WARN_ON(vma->obj->pin_global == 0)) return; - if (--vma->obj->pin_display == 0) + if (--vma->obj->pin_global == 0) vma->display_alignment = I915_GTT_MIN_ALIGNMENT; /* Bump the LRU to try and avoid premature eviction whilst flipping */ @@ -3959,42 +4091,47 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->base.dev->struct_mutex); + if (!view && flags & PIN_MAPPABLE) { + /* If the required space is larger than the available + * aperture, we will not able to find a slot for the + * object and unbinding the object now will be in + * vain. Worse, doing so may cause us to ping-pong + * the object in and out of the Global GTT and + * waste a lot of cycles under the mutex. + */ + if (obj->base.size > dev_priv->ggtt.mappable_end) + return ERR_PTR(-E2BIG); + + /* If NONBLOCK is set the caller is optimistically + * trying to cache the full object within the mappable + * aperture, and *must* have a fallback in place for + * situations where we cannot bind the object. We + * can be a little more lax here and use the fallback + * more often to avoid costly migrations of ourselves + * and other objects within the aperture. + * + * Half-the-aperture is used as a simple heuristic. + * More interesting would to do search for a free + * block prior to making the commitment to unbind. + * That caters for the self-harm case, and with a + * little more heuristics (e.g. NOFAULT, NOEVICT) + * we could try to minimise harm to others. + */ + if (flags & PIN_NONBLOCK && + obj->base.size > dev_priv->ggtt.mappable_end / 2) + return ERR_PTR(-ENOSPC); + } + vma = i915_vma_instance(obj, vm, view); if (unlikely(IS_ERR(vma))) return vma; if (i915_vma_misplaced(vma, size, alignment, flags)) { - if (flags & PIN_NONBLOCK && - (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))) - return ERR_PTR(-ENOSPC); + if (flags & PIN_NONBLOCK) { + if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) + return ERR_PTR(-ENOSPC); - if (flags & PIN_MAPPABLE) { - /* If the required space is larger than the available - * aperture, we will not able to find a slot for the - * object and unbinding the object now will be in - * vain. Worse, doing so may cause us to ping-pong - * the object in and out of the Global GTT and - * waste a lot of cycles under the mutex. - */ - if (vma->fence_size > dev_priv->ggtt.mappable_end) - return ERR_PTR(-E2BIG); - - /* If NONBLOCK is set the caller is optimistically - * trying to cache the full object within the mappable - * aperture, and *must* have a fallback in place for - * situations where we cannot bind the object. We - * can be a little more lax here and use the fallback - * more often to avoid costly migrations of ourselves - * and other objects within the aperture. - * - * Half-the-aperture is used as a simple heuristic. - * More interesting would to do search for a free - * block prior to making the commitment to unbind. - * That caters for the self-harm case, and with a - * little more heuristics (e.g. NOFAULT, NOEVICT) - * we could try to minimise harm to others. - */ - if (flags & PIN_NONBLOCK && + if (flags & PIN_MAPPABLE && vma->fence_size > dev_priv->ggtt.mappable_end / 2) return ERR_PTR(-ENOSPC); } @@ -4175,7 +4312,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, if (err) goto out; - if (obj->mm.pages && + if (i915_gem_object_has_pages(obj) && i915_gem_object_is_tiled(obj) && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { if (obj->mm.madv == I915_MADV_WILLNEED) { @@ -4194,7 +4331,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, obj->mm.madv = args->madv; /* if the object is no longer attached, discard its backing storage */ - if (obj->mm.madv == I915_MADV_DONTNEED && !obj->mm.pages) + if (obj->mm.madv == I915_MADV_DONTNEED && + !i915_gem_object_has_pages(obj)) i915_gem_object_truncate(obj); args->retained = obj->mm.madv != __I915_MADV_PURGED; @@ -4220,8 +4358,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, { mutex_init(&obj->mm.lock); - INIT_LIST_HEAD(&obj->global_link); - INIT_LIST_HEAD(&obj->userfault_link); INIT_LIST_HEAD(&obj->vma_list); INIT_LIST_HEAD(&obj->lut_list); INIT_LIST_HEAD(&obj->batch_pool_link); @@ -4251,6 +4387,30 @@ static const struct drm_i915_gem_object_ops i915_gem_object_ops = { .pwrite = i915_gem_object_pwrite_gtt, }; +static int i915_gem_object_create_shmem(struct drm_device *dev, + struct drm_gem_object *obj, + size_t size) +{ + struct drm_i915_private *i915 = to_i915(dev); + unsigned long flags = VM_NORESERVE; + struct file *filp; + + drm_gem_private_object_init(dev, obj, size); + + if (i915->mm.gemfs) + filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, + flags); + else + filp = shmem_file_setup("i915", size, flags); + + if (IS_ERR(filp)) + return PTR_ERR(filp); + + obj->filp = filp; + + return 0; +} + struct drm_i915_gem_object * i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) { @@ -4275,7 +4435,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) if (obj == NULL) return ERR_PTR(-ENOMEM); - ret = drm_gem_object_init(&dev_priv->drm, &obj->base, size); + ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); if (ret) goto fail; @@ -4352,13 +4512,14 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, { struct drm_i915_gem_object *obj, *on; - mutex_lock(&i915->drm.struct_mutex); intel_runtime_pm_get(i915); - llist_for_each_entry(obj, freed, freed) { + llist_for_each_entry_safe(obj, on, freed, freed) { struct i915_vma *vma, *vn; trace_i915_gem_object_destroy(obj); + mutex_lock(&i915->drm.struct_mutex); + GEM_BUG_ON(i915_gem_object_is_active(obj)); list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link) { @@ -4369,15 +4530,22 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, GEM_BUG_ON(!list_empty(&obj->vma_list)); GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); - list_del(&obj->global_link); - } - intel_runtime_pm_put(i915); - mutex_unlock(&i915->drm.struct_mutex); + /* This serializes freeing with the shrinker. Since the free + * is delayed, first by RCU then by the workqueue, we want the + * shrinker to be able to free pages of unreferenced objects, + * or else we may oom whilst there are plenty of deferred + * freed objects. + */ + if (i915_gem_object_has_pages(obj)) { + spin_lock(&i915->mm.obj_lock); + list_del_init(&obj->mm.link); + spin_unlock(&i915->mm.obj_lock); + } - cond_resched(); + mutex_unlock(&i915->drm.struct_mutex); - llist_for_each_entry_safe(obj, on, freed, freed) { GEM_BUG_ON(obj->bind_count); + GEM_BUG_ON(obj->userfault_count); GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); GEM_BUG_ON(!list_empty(&obj->lut_list)); @@ -4387,7 +4555,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) atomic_set(&obj->mm.pages_pin_count, 0); __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - GEM_BUG_ON(obj->mm.pages); + GEM_BUG_ON(i915_gem_object_has_pages(obj)); if (obj->base.import_attach) drm_prime_gem_destroy(&obj->base, NULL); @@ -4398,16 +4566,29 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, kfree(obj->bit_17); i915_gem_object_free(obj); + + if (on) + cond_resched(); } + intel_runtime_pm_put(i915); } static void i915_gem_flush_free_objects(struct drm_i915_private *i915) { struct llist_node *freed; - freed = llist_del_all(&i915->mm.free_list); - if (unlikely(freed)) + /* Free the oldest, most stale object to keep the free_list short */ + freed = NULL; + if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ + /* Only one consumer of llist_del_first() allowed */ + spin_lock(&i915->mm.free_lock); + freed = llist_del_first(&i915->mm.free_list); + spin_unlock(&i915->mm.free_lock); + } + if (unlikely(freed)) { + freed->next = NULL; __i915_gem_free_objects(i915, freed); + } } static void __i915_gem_free_work(struct work_struct *work) @@ -4424,11 +4605,17 @@ static void __i915_gem_free_work(struct work_struct *work) * unbound now. */ + spin_lock(&i915->mm.free_lock); while ((freed = llist_del_all(&i915->mm.free_list))) { + spin_unlock(&i915->mm.free_lock); + __i915_gem_free_objects(i915, freed); if (need_resched()) - break; + return; + + spin_lock(&i915->mm.free_lock); } + spin_unlock(&i915->mm.free_lock); } static void __i915_gem_free_object_rcu(struct rcu_head *head) @@ -4547,8 +4734,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) /* As the idle_work is rearming if it detects a race, play safe and * repeat the flush until it is definitely idle. */ - while (flush_delayed_work(&dev_priv->gt.idle_work)) - ; + drain_delayed_work(&dev_priv->gt.idle_work); /* Assert that we sucessfully flushed all the work and * reset the GPU back to its idle, low power state. @@ -4595,6 +4781,7 @@ void i915_gem_resume(struct drm_i915_private *dev_priv) mutex_lock(&dev->struct_mutex); i915_gem_restore_gtt_mappings(dev_priv); + i915_gem_restore_fences(dev_priv); /* As we didn't flush the kernel context before suspend, we cannot * guarantee that the context image is complete. So let's just reset @@ -4708,6 +4895,10 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) init_unused_rings(dev_priv); BUG_ON(!dev_priv->kernel_context); + if (i915_terminally_wedged(&dev_priv->gpu_error)) { + ret = -EIO; + goto out; + } ret = i915_ppgtt_init_hw(dev_priv); if (ret) { @@ -4757,6 +4948,15 @@ int i915_gem_init(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); + /* + * We need to fallback to 4K pages since gvt gtt handling doesn't + * support huge page entries - we will need to check either hypervisor + * mm can support huge guest page or just do emulation in gvt. + */ + if (intel_vgpu_active(dev_priv)) + mkwrite_device_info(dev_priv)->page_sizes = + I915_GTT_PAGE_SIZE_4K; + dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); if (!i915_modparams.enable_execlists) { @@ -4797,8 +4997,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv) * wedged. But we only want to do this where the GPU is angry, * for all other failure, such as an allocation failure, bail. */ - DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); - i915_gem_set_wedged(dev_priv); + if (!i915_terminally_wedged(&dev_priv->gpu_error)) { + DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); + i915_gem_set_wedged(dev_priv); + } ret = 0; } @@ -4898,11 +5100,15 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) goto err_priorities; INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work); + + spin_lock_init(&dev_priv->mm.obj_lock); + spin_lock_init(&dev_priv->mm.free_lock); init_llist_head(&dev_priv->mm.free_list); INIT_LIST_HEAD(&dev_priv->mm.unbound_list); INIT_LIST_HEAD(&dev_priv->mm.bound_list); INIT_LIST_HEAD(&dev_priv->mm.fence_list); INIT_LIST_HEAD(&dev_priv->mm.userfault_list); + INIT_DELAYED_WORK(&dev_priv->gt.retire_work, i915_gem_retire_work_handler); INIT_DELAYED_WORK(&dev_priv->gt.idle_work, @@ -4914,6 +5120,10 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) spin_lock_init(&dev_priv->fb_tracking.lock); + err = i915_gemfs_init(dev_priv); + if (err) + DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); + return 0; err_priorities: @@ -4952,6 +5162,8 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ rcu_barrier(); + + i915_gemfs_fini(dev_priv); } int i915_gem_freeze(struct drm_i915_private *dev_priv) @@ -4990,12 +5202,12 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) i915_gem_shrink(dev_priv, -1UL, NULL, I915_SHRINK_UNBOUND); i915_gem_drain_freed_objects(dev_priv); - mutex_lock(&dev_priv->drm.struct_mutex); + spin_lock(&dev_priv->mm.obj_lock); for (p = phases; *p; p++) { - list_for_each_entry(obj, *p, global_link) + list_for_each_entry(obj, *p, mm.link) __start_cpu_write(obj); } - mutex_unlock(&dev_priv->drm.struct_mutex); + spin_unlock(&dev_priv->mm.obj_lock); return 0; } @@ -5314,7 +5526,17 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) goto err_unlock; } - pages = obj->mm.pages; + pages = fetch_and_zero(&obj->mm.pages); + if (pages) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + + __i915_gem_object_reset_page_iter(obj); + + spin_lock(&i915->mm.obj_lock); + list_del(&obj->mm.link); + spin_unlock(&i915->mm.obj_lock); + } + obj->ops = &i915_gem_phys_ops; err = ____i915_gem_object_get_pages(obj); @@ -5341,6 +5563,7 @@ err_unlock: #include "selftests/scatterlist.c" #include "selftests/mock_gem_device.c" #include "selftests/huge_gem_object.c" +#include "selftests/huge_pages.c" #include "selftests/i915_gem_object.c" #include "selftests/i915_gem_coherency.c" #endif diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c index 8a04d33055be..f663cd919795 100644 --- a/drivers/gpu/drm/i915/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c @@ -70,6 +70,7 @@ static const struct dma_fence_ops i915_clflush_ops = { static void __i915_do_clflush(struct drm_i915_gem_object *obj) { + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); drm_clflush_sg(obj->mm.pages); intel_fb_obj_flush(obj, ORIGIN_CPU); } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 921ee369c74d..f782cf2069c1 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -104,17 +104,14 @@ static void lut_close(struct i915_gem_context *ctx) kmem_cache_free(ctx->i915->luts, lut); } + rcu_read_lock(); radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) { struct i915_vma *vma = rcu_dereference_raw(*slot); - struct drm_i915_gem_object *obj = vma->obj; radix_tree_iter_delete(&ctx->handles_vma, &iter, slot); - - if (!i915_vma_is_ggtt(vma)) - i915_vma_close(vma); - - __i915_gem_object_release_unless_active(obj); + __i915_gem_object_release_unless_active(vma->obj); } + rcu_read_unlock(); } static void i915_gem_context_free(struct i915_gem_context *ctx) @@ -198,6 +195,11 @@ static void context_close(struct i915_gem_context *ctx) { i915_gem_context_set_closed(ctx); + /* + * The LUT uses the VMA as a backpointer to unref the object, + * so we need to clear the LUT before we close all the VMA (inside + * the ppgtt). + */ lut_close(ctx); if (ctx->ppgtt) i915_ppgtt_close(&ctx->ppgtt->base); @@ -416,14 +418,43 @@ out: return ctx; } +static struct i915_gem_context * +create_kernel_context(struct drm_i915_private *i915, int prio) +{ + struct i915_gem_context *ctx; + + ctx = i915_gem_create_context(i915, NULL); + if (IS_ERR(ctx)) + return ctx; + + i915_gem_context_clear_bannable(ctx); + ctx->priority = prio; + ctx->ring_size = PAGE_SIZE; + + GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); + + return ctx; +} + +static void +destroy_kernel_context(struct i915_gem_context **ctxp) +{ + struct i915_gem_context *ctx; + + /* Keep the context ref so that we can free it immediately ourselves */ + ctx = i915_gem_context_get(fetch_and_zero(ctxp)); + GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); + + context_close(ctx); + i915_gem_context_free(ctx); +} + int i915_gem_contexts_init(struct drm_i915_private *dev_priv) { struct i915_gem_context *ctx; + int err; - /* Init should only be called once per module load. Eventually the - * restriction on the context_disabled check can be loosened. */ - if (WARN_ON(dev_priv->kernel_context)) - return 0; + GEM_BUG_ON(dev_priv->kernel_context); INIT_LIST_HEAD(&dev_priv->contexts.list); INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker); @@ -441,28 +472,38 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); ida_init(&dev_priv->contexts.hw_ida); - ctx = i915_gem_create_context(dev_priv, NULL); + /* lowest priority; idle task */ + ctx = create_kernel_context(dev_priv, I915_PRIORITY_MIN); if (IS_ERR(ctx)) { - DRM_ERROR("Failed to create default global context (error %ld)\n", - PTR_ERR(ctx)); - return PTR_ERR(ctx); + DRM_ERROR("Failed to create default global context\n"); + err = PTR_ERR(ctx); + goto err; } - - /* For easy recognisablity, we want the kernel context to be 0 and then + /* + * For easy recognisablity, we want the kernel context to be 0 and then * all user contexts will have non-zero hw_id. */ GEM_BUG_ON(ctx->hw_id); - - i915_gem_context_clear_bannable(ctx); - ctx->priority = I915_PRIORITY_MIN; /* lowest priority; idle task */ dev_priv->kernel_context = ctx; - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); + /* highest priority; preempting task */ + ctx = create_kernel_context(dev_priv, INT_MAX); + if (IS_ERR(ctx)) { + DRM_ERROR("Failed to create default preempt context\n"); + err = PTR_ERR(ctx); + goto err_kernel_context; + } + dev_priv->preempt_context = ctx; DRM_DEBUG_DRIVER("%s context support initialized\n", dev_priv->engine[RCS]->context_size ? "logical" : "fake"); return 0; + +err_kernel_context: + destroy_kernel_context(&dev_priv->kernel_context); +err: + return err; } void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) @@ -507,15 +548,10 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) void i915_gem_contexts_fini(struct drm_i915_private *i915) { - struct i915_gem_context *ctx; - lockdep_assert_held(&i915->drm.struct_mutex); - /* Keep the context so that we can free it immediately ourselves */ - ctx = i915_gem_context_get(fetch_and_zero(&i915->kernel_context)); - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); - context_close(ctx); - i915_gem_context_free(ctx); + destroy_kernel_context(&i915->preempt_context); + destroy_kernel_context(&i915->kernel_context); /* Must free all deferred contexts (via flush_workqueue) first */ ida_destroy(&i915->contexts.hw_ida); @@ -1036,6 +1072,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_BANNABLE: args->value = i915_gem_context_is_bannable(ctx); break; + case I915_CONTEXT_PARAM_PRIORITY: + args->value = ctx->priority; + break; default: ret = -EINVAL; break; @@ -1091,6 +1130,26 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, else i915_gem_context_clear_bannable(ctx); break; + + case I915_CONTEXT_PARAM_PRIORITY: + { + int priority = args->value; + + if (args->size) + ret = -EINVAL; + else if (!to_i915(dev)->engine[RCS]->schedule) + ret = -ENODEV; + else if (priority > I915_CONTEXT_MAX_USER_PRIORITY || + priority < I915_CONTEXT_MIN_USER_PRIORITY) + ret = -EINVAL; + else if (priority > I915_CONTEXT_DEFAULT_PRIORITY && + !capable(CAP_SYS_NICE)) + ret = -EPERM; + else + ctx->priority = priority; + } + break; + default: ret = -EINVAL; break; diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 6176e589cf09..864439a214c8 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -256,11 +256,21 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev, return drm_gem_dmabuf_export(dev, &exp_info); } -static struct sg_table * -i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) { - return dma_buf_map_attachment(obj->base.import_attach, - DMA_BIDIRECTIONAL); + struct sg_table *pages; + unsigned int sg_page_sizes; + + pages = dma_buf_map_attachment(obj->base.import_attach, + DMA_BIDIRECTIONAL); + if (IS_ERR(pages)) + return PTR_ERR(pages); + + sg_page_sizes = i915_sg_page_sizes(pages->sgl); + + __i915_gem_object_set_pages(obj, pages, sg_page_sizes); + + return 0; } static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 4df039ef2ce3..8daa8a78cdc0 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -33,21 +33,24 @@ #include "intel_drv.h" #include "i915_trace.h" -static bool ggtt_is_idle(struct drm_i915_private *dev_priv) +I915_SELFTEST_DECLARE(static struct igt_evict_ctl { + bool fail_if_busy:1; +} igt_evict_ctl;) + +static bool ggtt_is_idle(struct drm_i915_private *i915) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct intel_engine_cs *engine; - enum intel_engine_id id; + struct intel_engine_cs *engine; + enum intel_engine_id id; - for_each_engine(engine, dev_priv, id) { - struct intel_timeline *tl; + if (i915->gt.active_requests) + return false; - tl = &ggtt->base.timeline.engine[engine->id]; - if (i915_gem_active_isset(&tl->last_request)) - return false; - } + for_each_engine(engine, i915, id) { + if (engine->last_retired_context != i915->kernel_context) + return false; + } - return true; + return true; } static int ggtt_flush(struct drm_i915_private *i915) @@ -82,7 +85,7 @@ mark_free(struct drm_mm_scan *scan, if (i915_vma_is_pinned(vma)) return false; - if (flags & PIN_NONFAULT && !list_empty(&vma->obj->userfault_link)) + if (flags & PIN_NONFAULT && i915_vma_has_userfault(vma)) return false; list_add(&vma->evict_link, unwind); @@ -157,7 +160,8 @@ i915_gem_evict_something(struct i915_address_space *vm, min_size, alignment, cache_level, start, end, mode); - /* Retire before we search the active list. Although we have + /* + * Retire before we search the active list. Although we have * reasonable accuracy in our retirement lists, we may have * a stray pin (preventing eviction) that can only be resolved by * retiring. @@ -182,7 +186,8 @@ search_again: BUG_ON(ret); } - /* Can we unpin some objects such as idle hw contents, + /* + * Can we unpin some objects such as idle hw contents, * or pending flips? But since only the GGTT has global entries * such as scanouts, rinbuffers and contexts, we can skip the * purge when inspecting per-process local address spaces. @@ -190,19 +195,36 @@ search_again: if (!i915_is_ggtt(vm) || flags & PIN_NONBLOCK) return -ENOSPC; - if (ggtt_is_idle(dev_priv)) { - /* If we still have pending pageflip completions, drop - * back to userspace to give our workqueues time to - * acquire our locks and unpin the old scanouts. - */ - return intel_has_pending_fb_unpin(dev_priv) ? -EAGAIN : -ENOSPC; - } + /* + * Not everything in the GGTT is tracked via VMA using + * i915_vma_move_to_active(), otherwise we could evict as required + * with minimal stalling. Instead we are forced to idle the GPU and + * explicitly retire outstanding requests which will then remove + * the pinning for active objects such as contexts and ring, + * enabling us to evict them on the next iteration. + * + * To ensure that all user contexts are evictable, we perform + * a switch to the perma-pinned kernel context. This all also gives + * us a termination condition, when the last retired context is + * the kernel's there is no more we can evict. + */ + if (!ggtt_is_idle(dev_priv)) { + if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy)) + return -EBUSY; - ret = ggtt_flush(dev_priv); - if (ret) - return ret; + ret = ggtt_flush(dev_priv); + if (ret) + return ret; - goto search_again; + goto search_again; + } + + /* + * If we still have pending pageflip completions, drop + * back to userspace to give our workqueues time to + * acquire our locks and unpin the old scanouts. + */ + return intel_has_pending_fb_unpin(dev_priv) ? -EAGAIN : -ENOSPC; found: /* drm_mm doesn't allow any other other operations while @@ -315,6 +337,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, break; } + if (flags & PIN_NONFAULT && i915_vma_has_userfault(vma)) { + ret = -ENOSPC; + break; + } + /* Overlap of objects in the same batch? */ if (i915_vma_is_pinned(vma)) { ret = -ENOSPC; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d733c4d5a500..435ed95df144 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -343,6 +343,10 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, (vma->node.start + vma->node.size - 1) >> 32) return true; + if (flags & __EXEC_OBJECT_NEEDS_MAP && + !i915_vma_is_map_and_fenceable(vma)) + return true; + return false; } @@ -367,12 +371,12 @@ eb_pin_vma(struct i915_execbuffer *eb, return false; if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { - if (unlikely(i915_vma_get_fence(vma))) { + if (unlikely(i915_vma_pin_fence(vma))) { i915_vma_unpin(vma); return false; } - if (i915_vma_pin_fence(vma)) + if (vma->fence) exec_flags |= __EXEC_OBJECT_HAS_FENCE; } @@ -385,7 +389,7 @@ static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags) GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN)); if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) - i915_vma_unpin_fence(vma); + __i915_vma_unpin_fence(vma); __i915_vma_unpin(vma); } @@ -563,13 +567,13 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, } if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { - err = i915_vma_get_fence(vma); + err = i915_vma_pin_fence(vma); if (unlikely(err)) { i915_vma_unpin(vma); return err; } - if (i915_vma_pin_fence(vma)) + if (vma->fence) exec_flags |= __EXEC_OBJECT_HAS_FENCE; } @@ -974,7 +978,9 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, return ERR_PTR(err); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | PIN_NONBLOCK); + PIN_MAPPABLE | + PIN_NONBLOCK | + PIN_NONFAULT); if (IS_ERR(vma)) { memset(&cache->node, 0, sizeof(cache->node)); err = drm_mm_insert_node_in_range @@ -2098,6 +2104,11 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, goto err; } + if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { + err = -EINVAL; + goto err; + } + syncobj = drm_syncobj_find(file, fence.handle); if (!syncobj) { DRM_DEBUG("Invalid syncobj handle provided\n"); @@ -2105,6 +2116,9 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, goto err; } + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); + fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); } diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 2783d63bd1ad..012250f25255 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -240,7 +240,8 @@ static int fence_update(struct drm_i915_fence_reg *fence, /* Ensure that all userspace CPU access is completed before * stealing the fence. */ - i915_gem_release_mmap(fence->vma->obj); + GEM_BUG_ON(fence->vma->fence != fence); + i915_vma_revoke_mmap(fence->vma); fence->vma->fence = NULL; fence->vma = NULL; @@ -280,8 +281,7 @@ static int fence_update(struct drm_i915_fence_reg *fence, * * 0 on success, negative error code on failure. */ -int -i915_vma_put_fence(struct i915_vma *vma) +int i915_vma_put_fence(struct i915_vma *vma) { struct drm_i915_fence_reg *fence = vma->fence; @@ -299,6 +299,8 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) struct drm_i915_fence_reg *fence; list_for_each_entry(fence, &dev_priv->mm.fence_list, link) { + GEM_BUG_ON(fence->vma && fence->vma->fence != fence); + if (fence->pin_count) continue; @@ -313,7 +315,7 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) } /** - * i915_vma_get_fence - set up fencing for a vma + * i915_vma_pin_fence - set up fencing for a vma * @vma: vma to map through a fence reg * * When mapping objects through the GTT, userspace wants to be able to write @@ -331,10 +333,11 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) * 0 on success, negative error code on failure. */ int -i915_vma_get_fence(struct i915_vma *vma) +i915_vma_pin_fence(struct i915_vma *vma) { struct drm_i915_fence_reg *fence; struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; + int err; /* Note that we revoke fences on runtime suspend. Therefore the user * must keep the device awake whilst using the fence. @@ -344,6 +347,8 @@ i915_vma_get_fence(struct i915_vma *vma) /* Just update our place in the LRU if our fence is getting reused. */ if (vma->fence) { fence = vma->fence; + GEM_BUG_ON(fence->vma != vma); + fence->pin_count++; if (!fence->dirty) { list_move_tail(&fence->link, &fence->i915->mm.fence_list); @@ -353,10 +358,25 @@ i915_vma_get_fence(struct i915_vma *vma) fence = fence_find(vma->vm->i915); if (IS_ERR(fence)) return PTR_ERR(fence); + + GEM_BUG_ON(fence->pin_count); + fence->pin_count++; } else return 0; - return fence_update(fence, set); + err = fence_update(fence, set); + if (err) + goto out_unpin; + + GEM_BUG_ON(fence->vma != set); + GEM_BUG_ON(vma->fence != (set ? fence : NULL)); + + if (set) + return 0; + +out_unpin: + fence->pin_count--; + return err; } /** @@ -429,8 +449,10 @@ void i915_gem_revoke_fences(struct drm_i915_private *dev_priv) for (i = 0; i < dev_priv->num_fence_regs; i++) { struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; + GEM_BUG_ON(fence->vma && fence->vma->fence != fence); + if (fence->vma) - i915_gem_release_mmap(fence->vma->obj); + i915_vma_revoke_mmap(fence->vma); } } @@ -450,13 +472,15 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv) struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; struct i915_vma *vma = reg->vma; + GEM_BUG_ON(vma && vma->fence != reg); + /* * Commit delayed tiling changes if we have an object still * attached to the fence, otherwise just clear the fence. */ if (vma && !i915_gem_object_is_tiled(vma->obj)) { GEM_BUG_ON(!reg->dirty); - GEM_BUG_ON(!list_empty(&vma->obj->userfault_link)); + GEM_BUG_ON(i915_vma_has_userfault(vma)); list_move(®->link, &dev_priv->mm.fence_list); vma->fence = NULL; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4c82ceb8d318..2af65ecf2df8 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -135,11 +135,12 @@ static inline void i915_ggtt_invalidate(struct drm_i915_private *i915) int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, int enable_ppgtt) { - bool has_aliasing_ppgtt; bool has_full_ppgtt; bool has_full_48bit_ppgtt; - has_aliasing_ppgtt = dev_priv->info.has_aliasing_ppgtt; + if (!dev_priv->info.has_aliasing_ppgtt) + return 0; + has_full_ppgtt = dev_priv->info.has_full_ppgtt; has_full_48bit_ppgtt = dev_priv->info.has_full_48bit_ppgtt; @@ -149,9 +150,6 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, has_full_48bit_ppgtt = intel_vgpu_has_full_48bit_ppgtt(dev_priv); } - if (!has_aliasing_ppgtt) - return 0; - /* * We don't allow disabling PPGTT for gen9+ as it's a requirement for * execlists, the sole mechanism available to submit work. @@ -188,7 +186,7 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, return 2; } - return has_aliasing_ppgtt ? 1 : 0; + return 1; } static int ppgtt_bind_vma(struct i915_vma *vma, @@ -205,8 +203,6 @@ static int ppgtt_bind_vma(struct i915_vma *vma, return ret; } - vma->pages = vma->obj->mm.pages; - /* Currently applicable only to VLV */ pte_flags = 0; if (vma->obj->gt_ro) @@ -222,6 +218,30 @@ static void ppgtt_unbind_vma(struct i915_vma *vma) vma->vm->clear_range(vma->vm, vma->node.start, vma->size); } +static int ppgtt_set_pages(struct i915_vma *vma) +{ + GEM_BUG_ON(vma->pages); + + vma->pages = vma->obj->mm.pages; + + vma->page_sizes = vma->obj->mm.page_sizes; + + return 0; +} + +static void clear_pages(struct i915_vma *vma) +{ + GEM_BUG_ON(!vma->pages); + + if (vma->pages != vma->obj->mm.pages) { + sg_free_table(vma->pages); + kfree(vma->pages); + } + vma->pages = NULL; + + memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); +} + static gen8_pte_t gen8_pte_encode(dma_addr_t addr, enum i915_cache_level level) { @@ -497,22 +517,63 @@ static void fill_page_dma_32(struct i915_address_space *vm, static int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) { - struct page *page; + struct page *page = NULL; dma_addr_t addr; + int order; - page = alloc_page(gfp | __GFP_ZERO); - if (unlikely(!page)) - return -ENOMEM; + /* + * In order to utilize 64K pages for an object with a size < 2M, we will + * need to support a 64K scratch page, given that every 16th entry for a + * page-table operating in 64K mode must point to a properly aligned 64K + * region, including any PTEs which happen to point to scratch. + * + * This is only relevant for the 48b PPGTT where we support + * huge-gtt-pages, see also i915_vma_insert(). + * + * TODO: we should really consider write-protecting the scratch-page and + * sharing between ppgtt + */ + if (i915_vm_is_48bit(vm) && + HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) { + order = get_order(I915_GTT_PAGE_SIZE_64K); + page = alloc_pages(gfp | __GFP_ZERO | __GFP_NOWARN, order); + if (page) { + addr = dma_map_page(vm->dma, page, 0, + I915_GTT_PAGE_SIZE_64K, + PCI_DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(vm->dma, addr))) { + __free_pages(page, order); + page = NULL; + } - addr = dma_map_page(vm->dma, page, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL); - if (unlikely(dma_mapping_error(vm->dma, addr))) { - __free_page(page); - return -ENOMEM; + if (!IS_ALIGNED(addr, I915_GTT_PAGE_SIZE_64K)) { + dma_unmap_page(vm->dma, addr, + I915_GTT_PAGE_SIZE_64K, + PCI_DMA_BIDIRECTIONAL); + __free_pages(page, order); + page = NULL; + } + } + } + + if (!page) { + order = 0; + page = alloc_page(gfp | __GFP_ZERO); + if (unlikely(!page)) + return -ENOMEM; + + addr = dma_map_page(vm->dma, page, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(vm->dma, addr))) { + __free_page(page); + return -ENOMEM; + } } vm->scratch_page.page = page; vm->scratch_page.daddr = addr; + vm->scratch_page.order = order; + return 0; } @@ -520,8 +581,9 @@ static void cleanup_scratch_page(struct i915_address_space *vm) { struct i915_page_dma *p = &vm->scratch_page; - dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - __free_page(p->page); + dma_unmap_page(vm->dma, p->daddr, BIT(p->order) << PAGE_SHIFT, + PCI_DMA_BIDIRECTIONAL); + __free_pages(p->page, p->order); } static struct i915_page_table *alloc_pt(struct i915_address_space *vm) @@ -896,10 +958,14 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, } } -struct sgt_dma { +static inline struct sgt_dma { struct scatterlist *sg; dma_addr_t dma, max; -}; +} sgt_dma(struct i915_vma *vma) { + struct scatterlist *sg = vma->pages->sgl; + dma_addr_t addr = sg_dma_address(sg); + return (struct sgt_dma) { sg, addr, addr + sg->length }; +} struct gen8_insert_pte { u16 pml4e; @@ -980,15 +1046,110 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, u32 unused) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct sgt_dma iter = { - .sg = vma->pages->sgl, - .dma = sg_dma_address(iter.sg), - .max = iter.dma + iter.sg->length, - }; + struct sgt_dma iter = sgt_dma(vma); struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, cache_level); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; +} + +static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, + struct i915_page_directory_pointer **pdps, + struct sgt_dma *iter, + enum i915_cache_level cache_level) +{ + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); + u64 start = vma->node.start; + dma_addr_t rem = iter->sg->length; + + do { + struct gen8_insert_pte idx = gen8_insert_pte(start); + struct i915_page_directory_pointer *pdp = pdps[idx.pml4e]; + struct i915_page_directory *pd = pdp->page_directory[idx.pdpe]; + unsigned int page_size; + bool maybe_64K = false; + gen8_pte_t encode = pte_encode; + gen8_pte_t *vaddr; + u16 index, max; + + if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M && + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && + rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) { + index = idx.pde; + max = I915_PDES; + page_size = I915_GTT_PAGE_SIZE_2M; + + encode |= GEN8_PDE_PS_2M; + + vaddr = kmap_atomic_px(pd); + } else { + struct i915_page_table *pt = pd->page_table[idx.pde]; + + index = idx.pte; + max = GEN8_PTES; + page_size = I915_GTT_PAGE_SIZE; + + if (!index && + vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K && + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || + rem >= (max - index) << PAGE_SHIFT)) + maybe_64K = true; + + vaddr = kmap_atomic_px(pt); + } + + do { + GEM_BUG_ON(iter->sg->length < page_size); + vaddr[index++] = encode | iter->dma; + + start += page_size; + iter->dma += page_size; + rem -= page_size; + if (iter->dma >= iter->max) { + iter->sg = __sg_next(iter->sg); + if (!iter->sg) + break; + + rem = iter->sg->length; + iter->dma = sg_dma_address(iter->sg); + iter->max = iter->dma + rem; + + if (maybe_64K && index < max && + !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || + rem >= (max - index) << PAGE_SHIFT))) + maybe_64K = false; + + if (unlikely(!IS_ALIGNED(iter->dma, page_size))) + break; + } + } while (rem >= page_size && index < max); + + kunmap_atomic(vaddr); + + /* + * Is it safe to mark the 2M block as 64K? -- Either we have + * filled whole page-table with 64K entries, or filled part of + * it and have reached the end of the sg table and we have + * enough padding. + */ + if (maybe_64K && + (index == max || + (i915_vm_has_scratch_64K(vma->vm) && + !iter->sg && IS_ALIGNED(vma->node.start + + vma->node.size, + I915_GTT_PAGE_SIZE_2M)))) { + vaddr = kmap_atomic_px(pd); + vaddr[idx.pde] |= GEN8_PDE_IPS_64K; + kunmap_atomic(vaddr); + page_size = I915_GTT_PAGE_SIZE_64K; + } + + vma->page_sizes.gtt |= page_size; + } while (iter->sg); } static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, @@ -997,17 +1158,20 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, u32 unused) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct sgt_dma iter = { - .sg = vma->pages->sgl, - .dma = sg_dma_address(iter.sg), - .max = iter.dma + iter.sg->length, - }; + struct sgt_dma iter = sgt_dma(vma); struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; - struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); - while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], &iter, - &idx, cache_level)) - GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); + if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { + gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level); + } else { + struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); + + while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], + &iter, &idx, cache_level)) + GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; + } } static void gen8_free_page_tables(struct i915_address_space *vm, @@ -1173,7 +1337,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, if (IS_ERR(pt)) goto unwind; - if (count < GEN8_PTES) + if (count < GEN8_PTES || intel_vgpu_active(vm->i915)) gen8_initialize_pt(vm, pt); gen8_ppgtt_set_pde(vm, pd, pt, pde); @@ -1452,6 +1616,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.cleanup = gen8_ppgtt_cleanup; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; + ppgtt->base.set_pages = ppgtt_set_pages; + ppgtt->base.clear_pages = clear_pages; ppgtt->debug_dump = gen8_dump_ppgtt; return 0; @@ -1699,13 +1865,10 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, unsigned act_pt = first_entry / GEN6_PTES; unsigned act_pte = first_entry % GEN6_PTES; const u32 pte_encode = vm->pte_encode(0, cache_level, flags); - struct sgt_dma iter; + struct sgt_dma iter = sgt_dma(vma); gen6_pte_t *vaddr; vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]); - iter.sg = vma->pages->sgl; - iter.dma = sg_dma_address(iter.sg); - iter.max = iter.dma + iter.sg->length; do { vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); @@ -1726,6 +1889,8 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, } } while (1); kunmap_atomic(vaddr); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; } static int gen6_alloc_va_range(struct i915_address_space *vm, @@ -1894,6 +2059,8 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; + ppgtt->base.set_pages = ppgtt_set_pages; + ppgtt->base.clear_pages = clear_pages; ppgtt->base.cleanup = gen6_ppgtt_cleanup; ppgtt->debug_dump = gen6_dump_ppgtt; @@ -1933,7 +2100,7 @@ static void i915_address_space_init(struct i915_address_space *vm, INIT_LIST_HEAD(&vm->unbound_list); list_add_tail(&vm->global_link, &dev_priv->vm_list); - pagevec_init(&vm->free_pages, false); + pagevec_init(&vm->free_pages); } static void i915_address_space_fini(struct i915_address_space *vm) @@ -1961,6 +2128,23 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); else if (IS_GEN9_LP(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); + + /* + * To support 64K PTEs we need to first enable the use of the + * Intermediate-Page-Size(IPS) bit of the PDE field via some magical + * mmio, otherwise the page-walker will simply ignore the IPS bit. This + * shouldn't be needed after GEN10. + * + * 64K pages were first introduced from BDW+, although technically they + * only *work* from gen9+. For pre-BDW we instead have the option for + * 32K pages, but we don't currently have any support for it in our + * driver. + */ + if (HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_64K) && + INTEL_GEN(dev_priv) <= 10) + I915_WRITE(GEN8_GAMW_ECO_DEV_RW_IA, + I915_READ(GEN8_GAMW_ECO_DEV_RW_IA) | + GAMW_ECO_ENABLE_64K_IPS_FIELD); } int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv) @@ -2405,12 +2589,6 @@ static int ggtt_bind_vma(struct i915_vma *vma, struct drm_i915_gem_object *obj = vma->obj; u32 pte_flags; - if (unlikely(!vma->pages)) { - int ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; - } - /* Currently applicable only to VLV */ pte_flags = 0; if (obj->gt_ro) @@ -2420,6 +2598,8 @@ static int ggtt_bind_vma(struct i915_vma *vma, vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); intel_runtime_pm_put(i915); + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; + /* * Without aliasing PPGTT there's no difference between * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally @@ -2447,12 +2627,6 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, u32 pte_flags; int ret; - if (unlikely(!vma->pages)) { - ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; - } - /* Currently applicable only to VLV */ pte_flags = 0; if (vma->obj->gt_ro) @@ -2467,7 +2641,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, vma->node.start, vma->size); if (ret) - goto err_pages; + return ret; } appgtt->base.insert_entries(&appgtt->base, vma, cache_level, @@ -2481,17 +2655,6 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, } return 0; - -err_pages: - if (!(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND))) { - if (vma->pages != vma->obj->mm.pages) { - GEM_BUG_ON(!vma->pages); - sg_free_table(vma->pages); - kfree(vma->pages); - } - vma->pages = NULL; - } - return ret; } static void aliasing_gtt_unbind_vma(struct i915_vma *vma) @@ -2529,6 +2692,21 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL); } +static int ggtt_set_pages(struct i915_vma *vma) +{ + int ret; + + GEM_BUG_ON(vma->pages); + + ret = i915_get_ggtt_vma_pages(vma); + if (ret) + return ret; + + vma->page_sizes = vma->obj->mm.page_sizes; + + return 0; +} + static void i915_gtt_color_adjust(const struct drm_mm_node *node, unsigned long color, u64 *start, @@ -3151,6 +3329,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.cleanup = gen6_gmch_remove; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; + ggtt->base.set_pages = ggtt_set_pages; + ggtt->base.clear_pages = clear_pages; ggtt->base.insert_page = gen8_ggtt_insert_page; ggtt->base.clear_range = nop_clear_range; if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) @@ -3209,6 +3389,8 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.insert_entries = gen6_ggtt_insert_entries; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; + ggtt->base.set_pages = ggtt_set_pages; + ggtt->base.clear_pages = clear_pages; ggtt->base.cleanup = gen6_gmch_remove; ggtt->invalidate = gen6_ggtt_invalidate; @@ -3254,6 +3436,8 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.clear_range = i915_ggtt_clear_range; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; + ggtt->base.set_pages = ggtt_set_pages; + ggtt->base.clear_pages = clear_pages; ggtt->base.cleanup = i915_gmch_remove; ggtt->invalidate = gmch_ggtt_invalidate; @@ -3403,8 +3587,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */ /* clflush objects bound into the GGTT and rebind them. */ - list_for_each_entry_safe(obj, on, - &dev_priv->mm.bound_list, global_link) { + list_for_each_entry_safe(obj, on, &dev_priv->mm.bound_list, mm.link) { bool ggtt_bound = false; struct i915_vma *vma; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index f62fb903dc24..93211a96fdad 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -42,7 +42,13 @@ #include "i915_gem_request.h" #include "i915_selftest.h" -#define I915_GTT_PAGE_SIZE 4096UL +#define I915_GTT_PAGE_SIZE_4K BIT(12) +#define I915_GTT_PAGE_SIZE_64K BIT(16) +#define I915_GTT_PAGE_SIZE_2M BIT(21) + +#define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K +#define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M + #define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE #define I915_FENCE_REG_NONE -1 @@ -148,6 +154,9 @@ typedef u64 gen8_ppgtt_pml4e_t; #define GEN8_PPAT_GET_AGE(x) ((x) & (3 << 4)) #define CHV_PPAT_GET_SNOOP(x) ((x) & (1 << 6)) +#define GEN8_PDE_IPS_64K BIT(11) +#define GEN8_PDE_PS_2M BIT(7) + struct sg_table; struct intel_rotation_info { @@ -207,6 +216,7 @@ struct i915_vma; struct i915_page_dma { struct page *page; + int order; union { dma_addr_t daddr; @@ -329,6 +339,8 @@ struct i915_address_space { int (*bind_vma)(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); + int (*set_pages)(struct i915_vma *vma); + void (*clear_pages)(struct i915_vma *vma); I915_SELFTEST_DECLARE(struct fault_attr fault_attr); }; @@ -341,6 +353,12 @@ i915_vm_is_48bit(const struct i915_address_space *vm) return (vm->total - 1) >> 32; } +static inline bool +i915_vm_has_scratch_64K(struct i915_address_space *vm) +{ + return vm->scratch_page.order == get_order(I915_GTT_PAGE_SIZE_64K); +} + /* The Graphics Translation Table is the way in which GEN hardware translates a * Graphics Virtual Address into a Physical Address. In addition to the normal * collateral associated with any va->pa translations GEN hardware also has a diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index c1f64ddaf8aa..ee83ec838ee7 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -44,12 +44,12 @@ static void internal_free_pages(struct sg_table *st) kfree(st); } -static struct sg_table * -i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *st; struct scatterlist *sg; + unsigned int sg_page_sizes; unsigned int npages; int max_order; gfp_t gfp; @@ -78,16 +78,17 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) create_st: st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) - return ERR_PTR(-ENOMEM); + return -ENOMEM; npages = obj->base.size / PAGE_SIZE; if (sg_alloc_table(st, npages, GFP_KERNEL)) { kfree(st); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } sg = st->sgl; st->nents = 0; + sg_page_sizes = 0; do { int order = min(fls(npages) - 1, max_order); @@ -105,6 +106,7 @@ create_st: } while (1); sg_set_page(sg, page, PAGE_SIZE << order, 0); + sg_page_sizes |= PAGE_SIZE << order; st->nents++; npages -= 1 << order; @@ -132,13 +134,17 @@ create_st: * object are only valid whilst active and pinned. */ obj->mm.madv = I915_MADV_DONTNEED; - return st; + + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; err: sg_set_page(sg, NULL, 0, 0); sg_mark_end(sg); internal_free_pages(st); - return ERR_PTR(-ENOMEM); + + return -ENOMEM; } static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index c30d8f808185..63ce38c1cce9 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -69,7 +69,7 @@ struct drm_i915_gem_object_ops { * being released or under memory pressure (where we attempt to * reap pages for the shrinker). */ - struct sg_table *(*get_pages)(struct drm_i915_gem_object *); + int (*get_pages)(struct drm_i915_gem_object *); void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *); int (*pwrite)(struct drm_i915_gem_object *, @@ -114,7 +114,6 @@ struct drm_i915_gem_object { /** Stolen memory for this object, instead of being backed by shmem. */ struct drm_mm_node *stolen; - struct list_head global_link; union { struct rcu_head rcu; struct llist_node freed; @@ -123,6 +122,7 @@ struct drm_i915_gem_object { /** * Whether the object is currently in the GGTT mmap. */ + unsigned int userfault_count; struct list_head userfault_link; struct list_head batch_pool_link; @@ -160,7 +160,8 @@ struct drm_i915_gem_object { /** Count of VMA actually bound by this object */ unsigned int bind_count; unsigned int active_count; - unsigned int pin_display; + /** Count of how many global VMA are currently pinned for use by HW */ + unsigned int pin_global; struct { struct mutex lock; /* protects the pages and their use */ @@ -169,6 +170,35 @@ struct drm_i915_gem_object { struct sg_table *pages; void *mapping; + /* TODO: whack some of this into the error state */ + struct i915_page_sizes { + /** + * The sg mask of the pages sg_table. i.e the mask of + * of the lengths for each sg entry. + */ + unsigned int phys; + + /** + * The gtt page sizes we are allowed to use given the + * sg mask and the supported page sizes. This will + * express the smallest unit we can use for the whole + * object, as well as the larger sizes we may be able + * to use opportunistically. + */ + unsigned int sg; + + /** + * The actual gtt page size usage. Since we can have + * multiple vma associated with this object we need to + * prevent any trampling of state, hence a copy of this + * struct also lives in each vma, therefore the gtt + * value here should only be read/write through the vma. + */ + unsigned int gtt; + } page_sizes; + + I915_SELFTEST_DECLARE(unsigned int page_mask); + struct i915_gem_object_page_iter { struct scatterlist *sg_pos; unsigned int sg_idx; /* in pages, but 32bit eek! */ @@ -178,6 +208,12 @@ struct drm_i915_gem_object { } get_page; /** + * Element within i915->mm.unbound_list or i915->mm.bound_list, + * locked by i915->mm.obj_lock. + */ + struct list_head link; + + /** * Advice: are the backing pages purgeable? */ unsigned int madv:2; diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 4dd4c2159a92..3703dc91eeda 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -229,7 +229,7 @@ int i915_gem_render_state_emit(struct drm_i915_gem_request *req) return 0; /* Recreate the page after shrinking */ - if (!so->vma->obj->mm.pages) + if (!i915_gem_object_has_pages(so->vma->obj)) so->batch_offset = -1; ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 4eb1a76731b2..d140fcf5c6a3 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -186,7 +186,7 @@ i915_priotree_init(struct i915_priotree *pt) INIT_LIST_HEAD(&pt->signalers_list); INIT_LIST_HEAD(&pt->waiters_list); INIT_LIST_HEAD(&pt->link); - pt->priority = INT_MIN; + pt->priority = I915_PRIORITY_INVALID; } static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) @@ -416,7 +416,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) spin_lock_irq(&request->lock); if (request->waitboost) - atomic_dec(&request->i915->rps.num_waiters); + atomic_dec(&request->i915->gt_pm.rps.num_waiters); dma_fence_signal_locked(&request->fence); spin_unlock_irq(&request->lock); @@ -556,7 +556,16 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) switch (state) { case FENCE_COMPLETE: trace_i915_gem_request_submit(request); + /* + * We need to serialize use of the submit_request() callback with its + * hotplugging performed during an emergency i915_gem_set_wedged(). + * We use the RCU mechanism to mark the critical section in order to + * force i915_gem_set_wedged() to wait until the submit_request() is + * completed before proceeding. + */ + rcu_read_lock(); request->engine->submit_request(request); + rcu_read_unlock(); break; case FENCE_FREE: @@ -587,6 +596,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, lockdep_assert_held(&dev_priv->drm.struct_mutex); + /* + * Preempt contexts are reserved for exclusive use to inject a + * preemption context switch. They are never to be used for any trivial + * request! + */ + GEM_BUG_ON(ctx == dev_priv->preempt_context); + /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report * EIO if the GPU is already wedged. */ diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 96eb52471dad..26249f39de67 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -30,6 +30,8 @@ #include "i915_gem.h" #include "i915_sw_fence.h" +#include <uapi/drm/i915_drm.h> + struct drm_file; struct drm_i915_gem_object; struct drm_i915_gem_request; @@ -69,9 +71,14 @@ struct i915_priotree { struct list_head waiters_list; /* those after us, they depend upon us */ struct list_head link; int priority; -#define I915_PRIORITY_MAX 1024 -#define I915_PRIORITY_NORMAL 0 -#define I915_PRIORITY_MIN (-I915_PRIORITY_MAX) +}; + +enum { + I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, + I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, + I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, + + I915_PRIORITY_INVALID = INT_MIN }; struct i915_gem_capture_list { diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 74002b2d1b6f..3770e3323fc8 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -71,25 +71,6 @@ static void shrinker_unlock(struct drm_i915_private *dev_priv, bool unlock) mutex_unlock(&dev_priv->drm.struct_mutex); } -static bool any_vma_pinned(struct drm_i915_gem_object *obj) -{ - struct i915_vma *vma; - - list_for_each_entry(vma, &obj->vma_list, obj_link) { - /* Only GGTT vma may be permanently pinned, and are always - * at the start of the list. We can stop hunting as soon - * as we see a ppGTT vma. - */ - if (!i915_vma_is_ggtt(vma)) - break; - - if (i915_vma_is_pinned(vma)) - return true; - } - - return false; -} - static bool swap_available(void) { return get_nr_swap_pages() > 0; @@ -97,9 +78,6 @@ static bool swap_available(void) static bool can_release_pages(struct drm_i915_gem_object *obj) { - if (!obj->mm.pages) - return false; - /* Consider only shrinkable ojects. */ if (!i915_gem_object_is_shrinkable(obj)) return false; @@ -115,7 +93,13 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) if (atomic_read(&obj->mm.pages_pin_count) > obj->bind_count) return false; - if (any_vma_pinned(obj)) + /* If any vma are "permanently" pinned, it will prevent us from + * reclaiming the obj->mm.pages. We only allow scanout objects to claim + * a permanent pin, along with a few others like the context objects. + * To simplify the scan, and to avoid walking the list of vma under the + * object, we just check the count of its permanently pinned. + */ + if (READ_ONCE(obj->pin_global)) return false; /* We can only return physical pages to the system if we can either @@ -129,7 +113,7 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) { if (i915_gem_object_unbind(obj) == 0) __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); - return !READ_ONCE(obj->mm.pages); + return !i915_gem_object_has_pages(obj); } /** @@ -178,6 +162,18 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, if (!shrinker_lock(dev_priv, &unlock)) return 0; + /* + * When shrinking the active list, also consider active contexts. + * Active contexts are pinned until they are retired, and so can + * not be simply unbound to retire and unpin their pages. To shrink + * the contexts, we must wait until the gpu is idle. + * + * We don't care about errors here; if we cannot wait upon the GPU, + * we will free as much as we can and hope to get a second chance. + */ + if (flags & I915_SHRINK_ACTIVE) + i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED); + trace_i915_gem_shrink(dev_priv, target, flags); i915_gem_retire_requests(dev_priv); @@ -217,15 +213,20 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, continue; INIT_LIST_HEAD(&still_in_list); + + /* + * We serialize our access to unreferenced objects through + * the use of the struct_mutex. While the objects are not + * yet freed (due to RCU then a workqueue) we still want + * to be able to shrink their pages, so they remain on + * the unbound/bound list until actually freed. + */ + spin_lock(&dev_priv->mm.obj_lock); while (count < target && (obj = list_first_entry_or_null(phase->list, typeof(*obj), - global_link))) { - list_move_tail(&obj->global_link, &still_in_list); - if (!obj->mm.pages) { - list_del_init(&obj->global_link); - continue; - } + mm.link))) { + list_move_tail(&obj->mm.link, &still_in_list); if (flags & I915_SHRINK_PURGEABLE && obj->mm.madv != I915_MADV_DONTNEED) @@ -243,20 +244,24 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, if (!can_release_pages(obj)) continue; + spin_unlock(&dev_priv->mm.obj_lock); + if (unsafe_drop_pages(obj)) { /* May arrive from get_pages on another bo */ mutex_lock_nested(&obj->mm.lock, I915_MM_SHRINKER); - if (!obj->mm.pages) { + if (!i915_gem_object_has_pages(obj)) { __i915_gem_object_invalidate(obj); - list_del_init(&obj->global_link); count += obj->base.size >> PAGE_SHIFT; } mutex_unlock(&obj->mm.lock); - scanned += obj->base.size >> PAGE_SHIFT; } + scanned += obj->base.size >> PAGE_SHIFT; + + spin_lock(&dev_priv->mm.obj_lock); } list_splice_tail(&still_in_list, phase->list); + spin_unlock(&dev_priv->mm.obj_lock); } if (flags & I915_SHRINK_BOUND) @@ -302,28 +307,39 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) static unsigned long i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) { - struct drm_i915_private *dev_priv = + struct drm_i915_private *i915 = container_of(shrinker, struct drm_i915_private, mm.shrinker); struct drm_i915_gem_object *obj; - unsigned long count; - bool unlock; - - if (!shrinker_lock(dev_priv, &unlock)) - return 0; - - i915_gem_retire_requests(dev_priv); + unsigned long num_objects = 0; + unsigned long count = 0; - count = 0; - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) - if (can_release_pages(obj)) + spin_lock(&i915->mm.obj_lock); + list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) + if (can_release_pages(obj)) { count += obj->base.size >> PAGE_SHIFT; + num_objects++; + } - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { - if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) + list_for_each_entry(obj, &i915->mm.bound_list, mm.link) + if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) { count += obj->base.size >> PAGE_SHIFT; - } + num_objects++; + } + spin_unlock(&i915->mm.obj_lock); - shrinker_unlock(dev_priv, unlock); + /* Update our preferred vmscan batch size for the next pass. + * Our rough guess for an effective batch size is roughly 2 + * available GEM objects worth of pages. That is we don't want + * the shrinker to fire, until it is worth the cost of freeing an + * entire GEM object. + */ + if (num_objects) { + unsigned long avg = 2 * count / num_objects; + + i915->mm.shrinker.batch = + max((i915->mm.shrinker.batch + avg) >> 1, + 128ul /* default SHRINK_BATCH */); + } return count; } @@ -400,10 +416,6 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) container_of(nb, struct drm_i915_private, mm.oom_notifier); struct drm_i915_gem_object *obj; unsigned long unevictable, bound, unbound, freed_pages; - bool unlock; - - if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000)) - return NOTIFY_DONE; freed_pages = i915_gem_shrink_all(dev_priv); @@ -412,26 +424,20 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) * being pointed to by hardware. */ unbound = bound = unevictable = 0; - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) { - if (!obj->mm.pages) - continue; - + spin_lock(&dev_priv->mm.obj_lock); + list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) { if (!can_release_pages(obj)) unevictable += obj->base.size >> PAGE_SHIFT; else unbound += obj->base.size >> PAGE_SHIFT; } - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { - if (!obj->mm.pages) - continue; - + list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) { if (!can_release_pages(obj)) unevictable += obj->base.size >> PAGE_SHIFT; else bound += obj->base.size >> PAGE_SHIFT; } - - shrinker_unlock(dev_priv, unlock); + spin_unlock(&dev_priv->mm.obj_lock); if (freed_pages || unbound || bound) pr_info("Purging GPU memory, %lu pages freed, " @@ -498,6 +504,7 @@ void i915_gem_shrinker_init(struct drm_i915_private *dev_priv) dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan; dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count; dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS; + dev_priv->mm.shrinker.batch = 4096; WARN_ON(register_shrinker(&dev_priv->mm.shrinker)); dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 507c9f0d8df1..03e7abc7e043 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -539,12 +539,18 @@ i915_pages_create_for_stolen(struct drm_device *dev, return st; } -static struct sg_table * -i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) { - return i915_pages_create_for_stolen(obj->base.dev, - obj->stolen->start, - obj->stolen->size); + struct sg_table *pages = + i915_pages_create_for_stolen(obj->base.dev, + obj->stolen->start, + obj->stolen->size); + if (IS_ERR(pages)) + return PTR_ERR(pages); + + __i915_gem_object_set_pages(obj, pages, obj->stolen->size); + + return 0; } static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj, @@ -718,8 +724,11 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv vma->flags |= I915_VMA_GLOBAL_BIND; __i915_vma_set_map_and_fenceable(vma); list_move_tail(&vma->vm_link, &ggtt->base.inactive_list); - list_move_tail(&obj->global_link, &dev_priv->mm.bound_list); + + spin_lock(&dev_priv->mm.obj_lock); + list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); obj->bind_count++; + spin_unlock(&dev_priv->mm.obj_lock); return obj; diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index fb5231f98c0d..1294cf695df0 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -269,7 +269,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, * due to the change in swizzling. */ mutex_lock(&obj->mm.lock); - if (obj->mm.pages && + if (i915_gem_object_has_pages(obj) && obj->mm.madv == I915_MADV_WILLNEED && i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { if (tiling == I915_TILING_NONE) { diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 2d4996de7331..135fc750a837 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -82,11 +82,11 @@ static void cancel_userptr(struct work_struct *work) /* We are inside a kthread context and can't be interrupted */ if (i915_gem_object_unbind(obj) == 0) __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - WARN_ONCE(obj->mm.pages, - "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_display=%d\n", + WARN_ONCE(i915_gem_object_has_pages(obj), + "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_global=%d\n", obj->bind_count, atomic_read(&obj->mm.pages_pin_count), - obj->pin_display); + obj->pin_global); mutex_unlock(&obj->base.dev->struct_mutex); @@ -164,7 +164,6 @@ static struct i915_mmu_notifier * i915_mmu_notifier_create(struct mm_struct *mm) { struct i915_mmu_notifier *mn; - int ret; mn = kmalloc(sizeof(*mn), GFP_KERNEL); if (mn == NULL) @@ -179,14 +178,6 @@ i915_mmu_notifier_create(struct mm_struct *mm) return ERR_PTR(-ENOMEM); } - /* Protected by mmap_sem (write-lock) */ - ret = __mmu_notifier_register(&mn->mn, mm); - if (ret) { - destroy_workqueue(mn->wq); - kfree(mn); - return ERR_PTR(ret); - } - return mn; } @@ -210,23 +201,42 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) static struct i915_mmu_notifier * i915_mmu_notifier_find(struct i915_mm_struct *mm) { - struct i915_mmu_notifier *mn = mm->mn; + struct i915_mmu_notifier *mn; + int err = 0; mn = mm->mn; if (mn) return mn; + mn = i915_mmu_notifier_create(mm->mm); + if (IS_ERR(mn)) + err = PTR_ERR(mn); + down_write(&mm->mm->mmap_sem); mutex_lock(&mm->i915->mm_lock); - if ((mn = mm->mn) == NULL) { - mn = i915_mmu_notifier_create(mm->mm); - if (!IS_ERR(mn)) - mm->mn = mn; + if (mm->mn == NULL && !err) { + /* Protected by mmap_sem (write-lock) */ + err = __mmu_notifier_register(&mn->mn, mm->mm); + if (!err) { + /* Protected by mm_lock */ + mm->mn = fetch_and_zero(&mn); + } + } else if (mm->mn) { + /* + * Someone else raced and successfully installed the mmu + * notifier, we can cancel our own errors. + */ + err = 0; } mutex_unlock(&mm->i915->mm_lock); up_write(&mm->mm->mmap_sem); - return mn; + if (mn && !IS_ERR(mn)) { + destroy_workqueue(mn->wq); + kfree(mn); + } + + return err ? ERR_PTR(err) : mm->mn; } static int @@ -405,6 +415,7 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, { unsigned int max_segment = i915_sg_segment_size(); struct sg_table *st; + unsigned int sg_page_sizes; int ret; st = kmalloc(sizeof(*st), GFP_KERNEL); @@ -434,6 +445,10 @@ alloc_table: return ERR_PTR(ret); } + sg_page_sizes = i915_sg_page_sizes(st->sgl); + + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + return st; } @@ -521,7 +536,6 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) pages = __i915_gem_userptr_alloc_pages(obj, pvec, npages); if (!IS_ERR(pages)) { - __i915_gem_object_set_pages(obj, pages); pinned = 0; pages = NULL; } @@ -533,7 +547,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) } mutex_unlock(&obj->mm.lock); - release_pages(pvec, pinned, 0); + release_pages(pvec, pinned); kvfree(pvec); i915_gem_object_put(obj); @@ -582,8 +596,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj) return ERR_PTR(-EAGAIN); } -static struct sg_table * -i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) +static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { const int num_pages = obj->base.size >> PAGE_SHIFT; struct mm_struct *mm = obj->userptr.mm->mm; @@ -612,9 +625,9 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) if (obj->userptr.work) { /* active flag should still be held for the pending work */ if (IS_ERR(obj->userptr.work)) - return ERR_CAST(obj->userptr.work); + return PTR_ERR(obj->userptr.work); else - return ERR_PTR(-EAGAIN); + return -EAGAIN; } pvec = NULL; @@ -647,10 +660,10 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) __i915_gem_userptr_set_active(obj, true); if (IS_ERR(pages)) - release_pages(pvec, pinned, 0); + release_pages(pvec, pinned); kvfree(pvec); - return pages; + return PTR_ERR_OR_ZERO(pages); } static void diff --git a/drivers/gpu/drm/i915/i915_gemfs.c b/drivers/gpu/drm/i915/i915_gemfs.c new file mode 100644 index 000000000000..e2993857df37 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gemfs.c @@ -0,0 +1,74 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/pagemap.h> + +#include "i915_drv.h" +#include "i915_gemfs.h" + +int i915_gemfs_init(struct drm_i915_private *i915) +{ + struct file_system_type *type; + struct vfsmount *gemfs; + + type = get_fs_type("tmpfs"); + if (!type) + return -ENODEV; + + gemfs = kern_mount(type); + if (IS_ERR(gemfs)) + return PTR_ERR(gemfs); + + /* + * Enable huge-pages for objects that are at least HPAGE_PMD_SIZE, most + * likely 2M. Note that within_size may overallocate huge-pages, if say + * we allocate an object of size 2M + 4K, we may get 2M + 2M, but under + * memory pressure shmem should split any huge-pages which can be + * shrunk. + */ + + if (has_transparent_hugepage()) { + struct super_block *sb = gemfs->mnt_sb; + char options[] = "huge=within_size"; + int flags = 0; + int err; + + err = sb->s_op->remount_fs(sb, &flags, options); + if (err) { + kern_unmount(gemfs); + return err; + } + } + + i915->mm.gemfs = gemfs; + + return 0; +} + +void i915_gemfs_fini(struct drm_i915_private *i915) +{ + kern_unmount(i915->mm.gemfs); +} diff --git a/drivers/gpu/drm/i915/i915_gemfs.h b/drivers/gpu/drm/i915/i915_gemfs.h new file mode 100644 index 000000000000..cca8bdc5b93e --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gemfs.h @@ -0,0 +1,34 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_GEMFS_H__ +#define __I915_GEMFS_H__ + +struct drm_i915_private; + +int i915_gemfs_init(struct drm_i915_private *i915); + +void i915_gemfs_fini(struct drm_i915_private *i915); + +#endif diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index c14552ab270b..653fb69e7ecb 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -377,9 +377,9 @@ static void error_print_request(struct drm_i915_error_state_buf *m, if (!erq->seqno) return; - err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, emitted %dms ago, head %08x, tail %08x\n", + err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms ago, head %08x, tail %08x\n", prefix, erq->pid, erq->ban_score, - erq->context, erq->seqno, + erq->context, erq->seqno, erq->priority, jiffies_to_msecs(jiffies - erq->jiffies), erq->head, erq->tail); } @@ -388,9 +388,9 @@ static void error_print_context(struct drm_i915_error_state_buf *m, const char *header, const struct drm_i915_error_context *ctx) { - err_printf(m, "%s%s[%d] user_handle %d hw_id %d, ban score %d guilty %d active %d\n", + err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d guilty %d active %d\n", header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, - ctx->ban_score, ctx->guilty, ctx->active); + ctx->priority, ctx->ban_score, ctx->guilty, ctx->active); } static void error_print_engine(struct drm_i915_error_state_buf *m, @@ -1271,6 +1271,7 @@ static void record_request(struct drm_i915_gem_request *request, struct drm_i915_error_request *erq) { erq->context = request->ctx->hw_id; + erq->priority = request->priotree.priority; erq->ban_score = atomic_read(&request->ctx->ban_score); erq->seqno = request->global_seqno; erq->jiffies = request->emitted_jiffies; @@ -1364,6 +1365,7 @@ static void record_context(struct drm_i915_error_context *e, e->handle = ctx->user_handle; e->hw_id = ctx->hw_id; + e->priority = ctx->priority; e->ban_score = atomic_read(&ctx->ban_score); e->guilty = atomic_read(&ctx->guilty_count); e->active = atomic_read(&ctx->active_count); @@ -1672,8 +1674,8 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv, struct i915_gpu_state *error) { error->awake = dev_priv->gt.awake; - error->wakelock = atomic_read(&dev_priv->pm.wakeref_count); - error->suspended = dev_priv->pm.suspended; + error->wakelock = atomic_read(&dev_priv->runtime_pm.wakeref_count); + error->suspended = dev_priv->runtime_pm.suspended; error->iommu = -1; #ifdef CONFIG_INTEL_IOMMU diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 04f1281d81a5..f84c267728fd 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -21,12 +21,13 @@ * IN THE SOFTWARE. * */ -#include <linux/circ_buf.h> -#include "i915_drv.h" -#include "intel_uc.h" +#include <linux/circ_buf.h> #include <trace/events/dma_fence.h> +#include "i915_guc_submission.h" +#include "i915_drv.h" + /** * DOC: GuC-based command submission * @@ -337,7 +338,7 @@ static void guc_stage_desc_init(struct intel_guc *guc, for_each_engine_masked(engine, dev_priv, client->engines, tmp) { struct intel_context *ce = &ctx->engine[engine->id]; - uint32_t guc_engine_id = engine->guc_id; + u32 guc_engine_id = engine->guc_id; struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id]; /* TODO: We have a design issue to be solved here. Only when we @@ -387,13 +388,13 @@ static void guc_stage_desc_init(struct intel_guc *guc, gfx_addr = guc_ggtt_offset(client->vma); desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + client->doorbell_offset; - desc->db_trigger_cpu = (uintptr_t)__get_doorbell(client); + desc->db_trigger_cpu = ptr_to_u64(__get_doorbell(client)); desc->db_trigger_uk = gfx_addr + client->doorbell_offset; desc->process_desc = gfx_addr + client->proc_desc_offset; desc->wq_addr = gfx_addr + GUC_DB_SIZE; desc->wq_size = GUC_WQ_SIZE; - desc->desc_private = (uintptr_t)client; + desc->desc_private = ptr_to_u64(client); } static void guc_stage_desc_fini(struct intel_guc *guc, @@ -499,7 +500,7 @@ static void i915_guc_submit(struct intel_engine_cs *engine) const unsigned int engine_id = engine->id; unsigned int n; - for (n = 0; n < ARRAY_SIZE(execlists->port); n++) { + for (n = 0; n < execlists_num_ports(execlists); n++) { struct drm_i915_gem_request *rq; unsigned int count; @@ -609,6 +610,7 @@ done: execlists->first = rb; if (submit) { port_assign(port, last); + execlists_set_active(execlists, EXECLISTS_ACTIVE_USER); i915_guc_submit(engine); } spin_unlock_irq(&engine->timeline->lock); @@ -632,6 +634,8 @@ static void i915_guc_irq_handler(unsigned long data) rq = port_request(&port[0]); } + if (!rq) + execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); if (!port_isset(last_port)) i915_guc_dequeue(engine); @@ -643,48 +647,6 @@ static void i915_guc_irq_handler(unsigned long data) * path of i915_guc_submit() above. */ -/** - * intel_guc_allocate_vma() - Allocate a GGTT VMA for GuC usage - * @guc: the guc - * @size: size of area to allocate (both virtual space and memory) - * - * This is a wrapper to create an object for use with the GuC. In order to - * use it inside the GuC, an object needs to be pinned lifetime, so we allocate - * both some backing storage and a range inside the Global GTT. We must pin - * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that - * range is reserved inside GuC. - * - * Return: A i915_vma if successful, otherwise an ERR_PTR. - */ -struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - int ret; - - obj = i915_gem_object_create(dev_priv, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); - if (IS_ERR(vma)) - goto err; - - ret = i915_vma_pin(vma, 0, PAGE_SIZE, - PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP); - if (ret) { - vma = ERR_PTR(ret); - goto err; - } - - return vma; - -err: - i915_gem_object_put(obj); - return vma; -} - /* Check that a doorbell register is in the expected state */ static bool doorbell_ok(struct intel_guc *guc, u16 db_id) { @@ -796,8 +758,8 @@ static int guc_init_doorbell_hw(struct intel_guc *guc) */ static struct i915_guc_client * guc_client_alloc(struct drm_i915_private *dev_priv, - uint32_t engines, - uint32_t priority, + u32 engines, + u32 priority, struct i915_gem_context *ctx) { struct i915_guc_client *client; @@ -1069,6 +1031,7 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv) static void guc_interrupts_capture(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; struct intel_engine_cs *engine; enum intel_engine_id id; int irqs; @@ -1105,12 +1068,13 @@ static void guc_interrupts_capture(struct drm_i915_private *dev_priv) * Here we CLEAR REDIRECT_TO_GUC bit in pm_intrmsk_mbz, which will * result in the register bit being left SET! */ - dev_priv->rps.pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; - dev_priv->rps.pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; + rps->pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; } static void guc_interrupts_release(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; struct intel_engine_cs *engine; enum intel_engine_id id; int irqs; @@ -1129,8 +1093,8 @@ static void guc_interrupts_release(struct drm_i915_private *dev_priv) I915_WRITE(GUC_VCS2_VCS1_IER, 0); I915_WRITE(GUC_WD_VECS_IER, 0); - dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; - dev_priv->rps.pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; + rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + rps->pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; } int i915_guc_submission_enable(struct drm_i915_private *dev_priv) @@ -1212,55 +1176,3 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv) guc_client_free(guc->execbuf_client); guc->execbuf_client = NULL; } - -/** - * intel_guc_suspend() - notify GuC entering suspend state - * @dev_priv: i915 device private - */ -int intel_guc_suspend(struct drm_i915_private *dev_priv) -{ - struct intel_guc *guc = &dev_priv->guc; - struct i915_gem_context *ctx; - u32 data[3]; - - if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) - return 0; - - gen9_disable_guc_interrupts(dev_priv); - - ctx = dev_priv->kernel_context; - - data[0] = INTEL_GUC_ACTION_ENTER_S_STATE; - /* any value greater than GUC_POWER_D0 */ - data[1] = GUC_POWER_D1; - /* first page is shared data with GuC */ - data[2] = guc_ggtt_offset(ctx->engine[RCS].state) + LRC_GUCSHR_PN * PAGE_SIZE; - - return intel_guc_send(guc, data, ARRAY_SIZE(data)); -} - -/** - * intel_guc_resume() - notify GuC resuming from suspend state - * @dev_priv: i915 device private - */ -int intel_guc_resume(struct drm_i915_private *dev_priv) -{ - struct intel_guc *guc = &dev_priv->guc; - struct i915_gem_context *ctx; - u32 data[3]; - - if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) - return 0; - - if (i915_modparams.guc_log_level >= 0) - gen9_enable_guc_interrupts(dev_priv); - - ctx = dev_priv->kernel_context; - - data[0] = INTEL_GUC_ACTION_EXIT_S_STATE; - data[1] = GUC_POWER_D0; - /* first page is shared data with GuC */ - data[2] = guc_ggtt_offset(ctx->engine[RCS].state) + LRC_GUCSHR_PN * PAGE_SIZE; - - return intel_guc_send(guc, data, ARRAY_SIZE(data)); -} diff --git a/drivers/gpu/drm/i915/i915_guc_submission.h b/drivers/gpu/drm/i915/i915_guc_submission.h new file mode 100644 index 000000000000..cb4353b59059 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_guc_submission.h @@ -0,0 +1,80 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _I915_GUC_SUBMISSION_H_ +#define _I915_GUC_SUBMISSION_H_ + +#include <linux/spinlock.h> + +#include "i915_gem.h" + +struct drm_i915_private; + +/* + * This structure primarily describes the GEM object shared with the GuC. + * The specs sometimes refer to this object as a "GuC context", but we use + * the term "client" to avoid confusion with hardware contexts. This + * GEM object is held for the entire lifetime of our interaction with + * the GuC, being allocated before the GuC is loaded with its firmware. + * Because there's no way to update the address used by the GuC after + * initialisation, the shared object must stay pinned into the GGTT as + * long as the GuC is in use. We also keep the first page (only) mapped + * into kernel address space, as it includes shared data that must be + * updated on every request submission. + * + * The single GEM object described here is actually made up of several + * separate areas, as far as the GuC is concerned. The first page (kept + * kmap'd) includes the "process descriptor" which holds sequence data for + * the doorbell, and one cacheline which actually *is* the doorbell; a + * write to this will "ring the doorbell" (i.e. send an interrupt to the + * GuC). The subsequent pages of the client object constitute the work + * queue (a circular array of work items), again described in the process + * descriptor. Work queue pages are mapped momentarily as required. + */ +struct i915_guc_client { + struct i915_vma *vma; + void *vaddr; + struct i915_gem_context *owner; + struct intel_guc *guc; + + /* bitmap of (host) engine ids */ + u32 engines; + u32 priority; + u32 stage_id; + u32 proc_desc_offset; + + u16 doorbell_id; + unsigned long doorbell_offset; + + spinlock_t wq_lock; + /* Per-engine counts of GuC submissions */ + u64 submissions[I915_NUM_ENGINES]; +}; + +int i915_guc_submission_init(struct drm_i915_private *dev_priv); +int i915_guc_submission_enable(struct drm_i915_private *dev_priv); +void i915_guc_submission_disable(struct drm_i915_private *dev_priv); +void i915_guc_submission_fini(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index efd7827ff181..f8205841868b 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -404,19 +404,21 @@ void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv) { spin_lock_irq(&dev_priv->irq_lock); gen6_reset_pm_iir(dev_priv, dev_priv->pm_rps_events); - dev_priv->rps.pm_iir = 0; + dev_priv->gt_pm.rps.pm_iir = 0; spin_unlock_irq(&dev_priv->irq_lock); } void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) { - if (READ_ONCE(dev_priv->rps.interrupts_enabled)) + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + if (READ_ONCE(rps->interrupts_enabled)) return; spin_lock_irq(&dev_priv->irq_lock); - WARN_ON_ONCE(dev_priv->rps.pm_iir); + WARN_ON_ONCE(rps->pm_iir); WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); - dev_priv->rps.interrupts_enabled = true; + rps->interrupts_enabled = true; gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); spin_unlock_irq(&dev_priv->irq_lock); @@ -424,11 +426,13 @@ void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) { - if (!READ_ONCE(dev_priv->rps.interrupts_enabled)) + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + if (!READ_ONCE(rps->interrupts_enabled)) return; spin_lock_irq(&dev_priv->irq_lock); - dev_priv->rps.interrupts_enabled = false; + rps->interrupts_enabled = false; I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u)); @@ -442,7 +446,7 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) * we will reset the GPU to minimum frequencies, so the current * state of the worker can be discarded. */ - cancel_work_sync(&dev_priv->rps.work); + cancel_work_sync(&rps->work); gen6_reset_rps_interrupts(dev_priv); } @@ -1119,12 +1123,13 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv, void gen6_rps_reset_ei(struct drm_i915_private *dev_priv) { - memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei)); + memset(&dev_priv->gt_pm.rps.ei, 0, sizeof(dev_priv->gt_pm.rps.ei)); } static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) { - const struct intel_rps_ei *prev = &dev_priv->rps.ei; + struct intel_rps *rps = &dev_priv->gt_pm.rps; + const struct intel_rps_ei *prev = &rps->ei; struct intel_rps_ei now; u32 events = 0; @@ -1151,28 +1156,29 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) c0 = max(render, media); c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ - if (c0 > time * dev_priv->rps.up_threshold) + if (c0 > time * rps->up_threshold) events = GEN6_PM_RP_UP_THRESHOLD; - else if (c0 < time * dev_priv->rps.down_threshold) + else if (c0 < time * rps->down_threshold) events = GEN6_PM_RP_DOWN_THRESHOLD; } - dev_priv->rps.ei = now; + rps->ei = now; return events; } static void gen6_pm_rps_work(struct work_struct *work) { struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, rps.work); + container_of(work, struct drm_i915_private, gt_pm.rps.work); + struct intel_rps *rps = &dev_priv->gt_pm.rps; bool client_boost = false; int new_delay, adj, min, max; u32 pm_iir = 0; spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->rps.interrupts_enabled) { - pm_iir = fetch_and_zero(&dev_priv->rps.pm_iir); - client_boost = atomic_read(&dev_priv->rps.num_waiters); + if (rps->interrupts_enabled) { + pm_iir = fetch_and_zero(&rps->pm_iir); + client_boost = atomic_read(&rps->num_waiters); } spin_unlock_irq(&dev_priv->irq_lock); @@ -1181,18 +1187,18 @@ static void gen6_pm_rps_work(struct work_struct *work) if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost) goto out; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir); - adj = dev_priv->rps.last_adj; - new_delay = dev_priv->rps.cur_freq; - min = dev_priv->rps.min_freq_softlimit; - max = dev_priv->rps.max_freq_softlimit; + adj = rps->last_adj; + new_delay = rps->cur_freq; + min = rps->min_freq_softlimit; + max = rps->max_freq_softlimit; if (client_boost) - max = dev_priv->rps.max_freq; - if (client_boost && new_delay < dev_priv->rps.boost_freq) { - new_delay = dev_priv->rps.boost_freq; + max = rps->max_freq; + if (client_boost && new_delay < rps->boost_freq) { + new_delay = rps->boost_freq; adj = 0; } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { if (adj > 0) @@ -1200,15 +1206,15 @@ static void gen6_pm_rps_work(struct work_struct *work) else /* CHV needs even encode values */ adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1; - if (new_delay >= dev_priv->rps.max_freq_softlimit) + if (new_delay >= rps->max_freq_softlimit) adj = 0; } else if (client_boost) { adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { - if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq) - new_delay = dev_priv->rps.efficient_freq; - else if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit) - new_delay = dev_priv->rps.min_freq_softlimit; + if (rps->cur_freq > rps->efficient_freq) + new_delay = rps->efficient_freq; + else if (rps->cur_freq > rps->min_freq_softlimit) + new_delay = rps->min_freq_softlimit; adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { if (adj < 0) @@ -1216,13 +1222,13 @@ static void gen6_pm_rps_work(struct work_struct *work) else /* CHV needs even encode values */ adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1; - if (new_delay <= dev_priv->rps.min_freq_softlimit) + if (new_delay <= rps->min_freq_softlimit) adj = 0; } else { /* unknown event */ adj = 0; } - dev_priv->rps.last_adj = adj; + rps->last_adj = adj; /* sysfs frequency interfaces may have snuck in while servicing the * interrupt @@ -1232,15 +1238,15 @@ static void gen6_pm_rps_work(struct work_struct *work) if (intel_set_rps(dev_priv, new_delay)) { DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); - dev_priv->rps.last_adj = 0; + rps->last_adj = 0; } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); out: /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */ spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->rps.interrupts_enabled) + if (rps->interrupts_enabled) gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events); spin_unlock_irq(&dev_priv->irq_lock); } @@ -1382,7 +1388,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) bool tasklet = false; if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { - if (port_count(&execlists->port[0])) { + if (READ_ONCE(engine->execlists.active)) { __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); tasklet = true; } @@ -1723,12 +1729,14 @@ static void i9xx_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, * the work queue. */ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + if (pm_iir & dev_priv->pm_rps_events) { spin_lock(&dev_priv->irq_lock); gen6_mask_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events); - if (dev_priv->rps.interrupts_enabled) { - dev_priv->rps.pm_iir |= pm_iir & dev_priv->pm_rps_events; - schedule_work(&dev_priv->rps.work); + if (rps->interrupts_enabled) { + rps->pm_iir |= pm_iir & dev_priv->pm_rps_events; + schedule_work(&rps->work); } spin_unlock(&dev_priv->irq_lock); } @@ -2254,18 +2262,14 @@ static void ivb_err_int_handler(struct drm_i915_private *dev_priv) static void cpt_serr_int_handler(struct drm_i915_private *dev_priv) { u32 serr_int = I915_READ(SERR_INT); + enum pipe pipe; if (serr_int & SERR_INT_POISON) DRM_ERROR("PCH poison interrupt\n"); - if (serr_int & SERR_INT_TRANS_A_FIFO_UNDERRUN) - intel_pch_fifo_underrun_irq_handler(dev_priv, PIPE_A); - - if (serr_int & SERR_INT_TRANS_B_FIFO_UNDERRUN) - intel_pch_fifo_underrun_irq_handler(dev_priv, PIPE_B); - - if (serr_int & SERR_INT_TRANS_C_FIFO_UNDERRUN) - intel_pch_fifo_underrun_irq_handler(dev_priv, PIPE_C); + for_each_pipe(dev_priv, pipe) + if (serr_int & SERR_INT_TRANS_FIFO_UNDERRUN(pipe)) + intel_pch_fifo_underrun_irq_handler(dev_priv, pipe); I915_WRITE(SERR_INT, serr_int); } @@ -3163,10 +3167,17 @@ void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv, enum pipe pipe; spin_lock_irq(&dev_priv->irq_lock); + + if (!intel_irqs_enabled(dev_priv)) { + spin_unlock_irq(&dev_priv->irq_lock); + return; + } + for_each_pipe_masked(dev_priv, pipe, pipe_mask) GEN8_IRQ_INIT_NDX(DE_PIPE, pipe, dev_priv->de_irq_mask[pipe], ~dev_priv->de_irq_mask[pipe] | extra_ier); + spin_unlock_irq(&dev_priv->irq_lock); } @@ -3176,8 +3187,15 @@ void gen8_irq_power_well_pre_disable(struct drm_i915_private *dev_priv, enum pipe pipe; spin_lock_irq(&dev_priv->irq_lock); + + if (!intel_irqs_enabled(dev_priv)) { + spin_unlock_irq(&dev_priv->irq_lock); + return; + } + for_each_pipe_masked(dev_priv, pipe, pipe_mask) GEN8_IRQ_RESET_NDX(DE_PIPE, pipe); + spin_unlock_irq(&dev_priv->irq_lock); /* make sure we're done processing display irqs */ @@ -3598,16 +3616,15 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) else if (IS_BROADWELL(dev_priv)) de_port_enables |= GEN8_PORT_DP_A_HOTPLUG; - dev_priv->de_irq_mask[PIPE_A] = ~de_pipe_masked; - dev_priv->de_irq_mask[PIPE_B] = ~de_pipe_masked; - dev_priv->de_irq_mask[PIPE_C] = ~de_pipe_masked; + for_each_pipe(dev_priv, pipe) { + dev_priv->de_irq_mask[pipe] = ~de_pipe_masked; - for_each_pipe(dev_priv, pipe) if (intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe))) GEN8_IRQ_INIT_NDX(DE_PIPE, pipe, dev_priv->de_irq_mask[pipe], de_pipe_enables); + } GEN3_IRQ_INIT(GEN8_DE_PORT_, ~de_port_masked, de_port_enables); GEN3_IRQ_INIT(GEN8_DE_MISC_, ~de_misc_masked, de_misc_masked); @@ -4000,11 +4017,12 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) void intel_irq_init(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; + struct intel_rps *rps = &dev_priv->gt_pm.rps; int i; intel_hpd_init_work(dev_priv); - INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work); + INIT_WORK(&rps->work, gen6_pm_rps_work); INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); for (i = 0; i < MAX_L3_SLICES; ++i) @@ -4020,7 +4038,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv) else dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS; - dev_priv->rps.pm_intrmsk_mbz = 0; + rps->pm_intrmsk_mbz = 0; /* * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer @@ -4029,10 +4047,10 @@ void intel_irq_init(struct drm_i915_private *dev_priv) * TODO: verify if this can be reproduced on VLV,CHV. */ if (INTEL_GEN(dev_priv) <= 7) - dev_priv->rps.pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; + rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; if (INTEL_GEN(dev_priv) >= 8) - dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; if (IS_GEN2(dev_priv)) { /* Gen2 doesn't have a hardware frame counter */ @@ -4166,7 +4184,7 @@ int intel_irq_install(struct drm_i915_private *dev_priv) * interrupts as enabled _before_ actually enabling them to avoid * special cases in our ordering checks. */ - dev_priv->pm.irqs_enabled = true; + dev_priv->runtime_pm.irqs_enabled = true; return drm_irq_install(&dev_priv->drm, dev_priv->drm.pdev->irq); } @@ -4182,7 +4200,7 @@ void intel_irq_uninstall(struct drm_i915_private *dev_priv) { drm_irq_uninstall(&dev_priv->drm); intel_hpd_cancel_work(dev_priv); - dev_priv->pm.irqs_enabled = false; + dev_priv->runtime_pm.irqs_enabled = false; } /** @@ -4195,7 +4213,7 @@ void intel_irq_uninstall(struct drm_i915_private *dev_priv) void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv) { dev_priv->drm.driver->irq_uninstall(&dev_priv->drm); - dev_priv->pm.irqs_enabled = false; + dev_priv->runtime_pm.irqs_enabled = false; synchronize_irq(dev_priv->drm.irq); } @@ -4208,7 +4226,7 @@ void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv) */ void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv) { - dev_priv->pm.irqs_enabled = true; + dev_priv->runtime_pm.irqs_enabled = true; dev_priv->drm.driver->irq_preinstall(&dev_priv->drm); dev_priv->drm.driver->irq_postinstall(&dev_priv->drm); } diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 9dff323a83d3..b4faeb6aa2bd 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -146,9 +146,6 @@ i915_param_named(disable_display, bool, 0400, i915_param_named_unsafe(enable_cmd_parser, bool, 0400, "Enable command parsing (true=enabled [default], false=disabled)"); -i915_param_named_unsafe(use_mmio_flip, int, 0600, - "use MMIO flips (-1=never, 0=driver discretion [default], 1=always)"); - i915_param_named(mmio_debug, int, 0600, "Enable the MMIO debug code for the first N failures (default: off). " "This may negatively affect performance."); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 4f3f8d650194..c7292268ed43 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -49,7 +49,6 @@ param(int, guc_log_level, -1) \ param(char *, guc_firmware_path, NULL) \ param(char *, huc_firmware_path, NULL) \ - param(int, use_mmio_flip, 0) \ param(int, mmio_debug, 0) \ param(int, edp_vswing, 0) \ param(int, reset, 2) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index da60866b6628..6458c309c039 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -54,8 +54,14 @@ .color = { .degamma_lut_size = 512, .gamma_lut_size = 512 } #define CHV_COLORS \ .color = { .degamma_lut_size = 65, .gamma_lut_size = 257 } +#define GLK_COLORS \ + .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } /* Keep in gen based order, and chronological order within a gen */ + +#define GEN_DEFAULT_PAGE_SIZES \ + .page_sizes = I915_GTT_PAGE_SIZE_4K + #define GEN2_FEATURES \ .gen = 2, .num_pipes = 1, \ .has_overlay = 1, .overlay_needs_physical = 1, \ @@ -65,6 +71,7 @@ .ring_mask = RENDER_RING, \ .has_snoop = true, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS static const struct intel_device_info intel_i830_info __initconst = { @@ -98,6 +105,7 @@ static const struct intel_device_info intel_i865g_info __initconst = { .ring_mask = RENDER_RING, \ .has_snoop = true, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS static const struct intel_device_info intel_i915g_info __initconst = { @@ -161,6 +169,7 @@ static const struct intel_device_info intel_pineview_info __initconst = { .ring_mask = RENDER_RING, \ .has_snoop = true, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS static const struct intel_device_info intel_i965g_info __initconst = { @@ -184,7 +193,6 @@ static const struct intel_device_info intel_i965gm_info __initconst = { static const struct intel_device_info intel_g45_info __initconst = { GEN4_FEATURES, .platform = INTEL_G45, - .has_pipe_cxsr = 1, .ring_mask = RENDER_RING | BSD_RING, }; @@ -192,7 +200,6 @@ static const struct intel_device_info intel_gm45_info __initconst = { GEN4_FEATURES, .platform = INTEL_GM45, .is_mobile = 1, .has_fbc = 1, - .has_pipe_cxsr = 1, .supports_tv = 1, .ring_mask = RENDER_RING | BSD_RING, }; @@ -203,6 +210,7 @@ static const struct intel_device_info intel_gm45_info __initconst = { .ring_mask = RENDER_RING | BSD_RING, \ .has_snoop = true, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS static const struct intel_device_info intel_ironlake_d_info __initconst = { @@ -226,6 +234,7 @@ static const struct intel_device_info intel_ironlake_m_info __initconst = { .has_rc6p = 1, \ .has_aliasing_ppgtt = 1, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS #define SNB_D_PLATFORM \ @@ -269,6 +278,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info __initconst = .has_aliasing_ppgtt = 1, \ .has_full_ppgtt = 1, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ IVB_CURSOR_OFFSETS #define IVB_D_PLATFORM \ @@ -325,11 +335,12 @@ static const struct intel_device_info intel_valleyview_info __initconst = { .has_snoop = true, .ring_mask = RENDER_RING | BSD_RING | BLT_RING, .display_mmio_offset = VLV_DISPLAY_BASE, + GEN_DEFAULT_PAGE_SIZES, GEN_DEFAULT_PIPEOFFSETS, CURSOR_OFFSETS }; -#define HSW_FEATURES \ +#define G75_FEATURES \ GEN7_FEATURES, \ .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, \ .has_ddi = 1, \ @@ -341,7 +352,7 @@ static const struct intel_device_info intel_valleyview_info __initconst = { .has_runtime_pm = 1 #define HSW_PLATFORM \ - HSW_FEATURES, \ + G75_FEATURES, \ .platform = INTEL_HASWELL, \ .has_l3_dpf = 1 @@ -360,16 +371,18 @@ static const struct intel_device_info intel_haswell_gt3_info __initconst = { .gt = 3, }; -#define BDW_FEATURES \ - HSW_FEATURES, \ +#define GEN8_FEATURES \ + G75_FEATURES, \ BDW_COLORS, \ + .page_sizes = I915_GTT_PAGE_SIZE_4K | \ + I915_GTT_PAGE_SIZE_2M, \ .has_logical_ring_contexts = 1, \ .has_full_48bit_ppgtt = 1, \ .has_64bit_reloc = 1, \ .has_reset_engine = 1 #define BDW_PLATFORM \ - BDW_FEATURES, \ + GEN8_FEATURES, \ .gen = 8, \ .platform = INTEL_BROADWELL @@ -415,19 +428,31 @@ static const struct intel_device_info intel_cherryview_info __initconst = { .has_reset_engine = 1, .has_snoop = true, .display_mmio_offset = VLV_DISPLAY_BASE, + GEN_DEFAULT_PAGE_SIZES, GEN_CHV_PIPEOFFSETS, CURSOR_OFFSETS, CHV_COLORS, }; -#define SKL_PLATFORM \ - BDW_FEATURES, \ - .gen = 9, \ - .platform = INTEL_SKYLAKE, \ +#define GEN9_DEFAULT_PAGE_SIZES \ + .page_sizes = I915_GTT_PAGE_SIZE_4K | \ + I915_GTT_PAGE_SIZE_64K | \ + I915_GTT_PAGE_SIZE_2M + +#define GEN9_FEATURES \ + GEN8_FEATURES, \ + GEN9_DEFAULT_PAGE_SIZES, \ + .has_logical_ring_preemption = 1, \ .has_csr = 1, \ .has_guc = 1, \ + .has_ipc = 1, \ .ddb_size = 896 +#define SKL_PLATFORM \ + GEN9_FEATURES, \ + .gen = 9, \ + .platform = INTEL_SKYLAKE + static const struct intel_device_info intel_skylake_gt1_info __initconst = { SKL_PLATFORM, .gt = 1, @@ -463,6 +488,7 @@ static const struct intel_device_info intel_skylake_gt4_info __initconst = { .has_ddi = 1, \ .has_fpga_dbg = 1, \ .has_fbc = 1, \ + .has_psr = 1, \ .has_runtime_pm = 1, \ .has_pooled_eu = 0, \ .has_csr = 1, \ @@ -470,6 +496,7 @@ static const struct intel_device_info intel_skylake_gt4_info __initconst = { .has_rc6 = 1, \ .has_dp_mst = 1, \ .has_logical_ring_contexts = 1, \ + .has_logical_ring_preemption = 1, \ .has_guc = 1, \ .has_aliasing_ppgtt = 1, \ .has_full_ppgtt = 1, \ @@ -477,6 +504,7 @@ static const struct intel_device_info intel_skylake_gt4_info __initconst = { .has_reset_engine = 1, \ .has_snoop = true, \ .has_ipc = 1, \ + GEN9_DEFAULT_PAGE_SIZES, \ GEN_DEFAULT_PIPEOFFSETS, \ IVB_CURSOR_OFFSETS, \ BDW_COLORS @@ -491,17 +519,13 @@ static const struct intel_device_info intel_geminilake_info __initconst = { GEN9_LP_FEATURES, .platform = INTEL_GEMINILAKE, .ddb_size = 1024, - .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } + GLK_COLORS, }; #define KBL_PLATFORM \ - BDW_FEATURES, \ + GEN9_FEATURES, \ .gen = 9, \ - .platform = INTEL_KABYLAKE, \ - .has_csr = 1, \ - .has_guc = 1, \ - .has_ipc = 1, \ - .ddb_size = 896 + .platform = INTEL_KABYLAKE static const struct intel_device_info intel_kabylake_gt1_info __initconst = { KBL_PLATFORM, @@ -520,13 +544,9 @@ static const struct intel_device_info intel_kabylake_gt3_info __initconst = { }; #define CFL_PLATFORM \ - BDW_FEATURES, \ + GEN9_FEATURES, \ .gen = 9, \ - .platform = INTEL_COFFEELAKE, \ - .has_csr = 1, \ - .has_guc = 1, \ - .has_ipc = 1, \ - .ddb_size = 896 + .platform = INTEL_COFFEELAKE static const struct intel_device_info intel_coffeelake_gt1_info __initconst = { CFL_PLATFORM, @@ -544,16 +564,17 @@ static const struct intel_device_info intel_coffeelake_gt3_info __initconst = { .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, }; +#define GEN10_FEATURES \ + GEN9_FEATURES, \ + .ddb_size = 1024, \ + GLK_COLORS + static const struct intel_device_info intel_cannonlake_gt2_info __initconst = { - BDW_FEATURES, + GEN10_FEATURES, .is_alpha_support = 1, .platform = INTEL_CANNONLAKE, .gen = 10, .gt = 2, - .ddb_size = 1024, - .has_csr = 1, - .has_ipc = 1, - .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } }; /* @@ -622,7 +643,7 @@ static void i915_pci_remove(struct pci_dev *pdev) struct drm_device *dev = pci_get_drvdata(pdev); i915_driver_unload(dev); - drm_dev_unref(dev); + drm_dev_put(dev); } static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 1383a2995a69..59ee808f8fd9 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2537,6 +2537,10 @@ static const struct file_operations fops = { .poll = i915_perf_poll, .read = i915_perf_read, .unlocked_ioctl = i915_perf_ioctl, + /* Our ioctl have no arguments, so it's safe to use the same function + * to handle 32bits compatibility. + */ + .compat_ioctl = i915_perf_ioctl, }; diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h b/drivers/gpu/drm/i915/i915_pvinfo.h index 0679a58cdbae..195203f298df 100644 --- a/drivers/gpu/drm/i915/i915_pvinfo.h +++ b/drivers/gpu/drm/i915/i915_pvinfo.h @@ -53,6 +53,7 @@ enum vgt_g2v_type { * VGT capabilities type */ #define VGT_CAPS_FULL_48BIT_PPGTT BIT(2) +#define VGT_CAPS_HWSP_EMULATION BIT(3) struct vgt_if { u64 magic; /* VGT_MAGIC */ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ee0d4f14ac98..68a58cce6ab1 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2371,6 +2371,9 @@ enum i915_power_well_id { #define GEN9_GAMT_ECO_REG_RW_IA _MMIO(0x4ab0) #define GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS (1<<18) +#define GEN8_GAMW_ECO_DEV_RW_IA _MMIO(0x4080) +#define GAMW_ECO_ENABLE_64K_IPS_FIELD 0xF + #define GAMT_CHKN_BIT_REG _MMIO(0x4ab8) #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1<<28) #define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1<<24) @@ -3819,6 +3822,16 @@ enum { #define PWM2_GATING_DIS (1 << 14) #define PWM1_GATING_DIS (1 << 13) +#define _CLKGATE_DIS_PSL_A 0x46520 +#define _CLKGATE_DIS_PSL_B 0x46524 +#define _CLKGATE_DIS_PSL_C 0x46528 +#define DPF_GATING_DIS (1 << 10) +#define DPF_RAM_GATING_DIS (1 << 9) +#define DPFR_GATING_DIS (1 << 8) + +#define CLKGATE_DIS_PSL(pipe) \ + _MMIO_PIPE(pipe, _CLKGATE_DIS_PSL_A, _CLKGATE_DIS_PSL_B) + /* * GEN10 clock gating regs */ @@ -5229,7 +5242,7 @@ enum { #define DP_AUX_CH_CTL_TIME_OUT_400us (0 << 26) #define DP_AUX_CH_CTL_TIME_OUT_600us (1 << 26) #define DP_AUX_CH_CTL_TIME_OUT_800us (2 << 26) -#define DP_AUX_CH_CTL_TIME_OUT_1600us (3 << 26) +#define DP_AUX_CH_CTL_TIME_OUT_MAX (3 << 26) /* Varies per platform */ #define DP_AUX_CH_CTL_TIME_OUT_MASK (3 << 26) #define DP_AUX_CH_CTL_RECEIVE_ERROR (1 << 25) #define DP_AUX_CH_CTL_MESSAGE_SIZE_MASK (0x1f << 20) @@ -5671,8 +5684,7 @@ enum { #define CBR_PWM_CLOCK_MUX_SELECT (1<<30) #define CBR4_VLV _MMIO(VLV_DISPLAY_BASE + 0x70450) -#define CBR_DPLLBMD_PIPE_C (1<<29) -#define CBR_DPLLBMD_PIPE_B (1<<18) +#define CBR_DPLLBMD_PIPE(pipe) (1<<(7+(pipe)*11)) /* pipes B and C */ /* FIFO watermark sizes etc */ #define G4X_FIFO_LINE_SIZE 64 @@ -6993,6 +7005,12 @@ enum { #define GEN9_CS_DEBUG_MODE1 _MMIO(0x20ec) #define GEN9_CTX_PREEMPT_REG _MMIO(0x2248) #define GEN8_CS_CHICKEN1 _MMIO(0x2580) +#define GEN9_PREEMPT_3D_OBJECT_LEVEL (1<<0) +#define GEN9_PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1)) +#define GEN9_PREEMPT_GPGPU_MID_THREAD_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(0, 0) +#define GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(0, 1) +#define GEN9_PREEMPT_GPGPU_COMMAND_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(1, 0) +#define GEN9_PREEMPT_GPGPU_LEVEL_MASK GEN9_PREEMPT_GPGPU_LEVEL(1, 1) /* GEN7 chicken */ #define GEN7_COMMON_SLICE_CHICKEN1 _MMIO(0x7010) @@ -7023,6 +7041,7 @@ enum { */ #define L3_GENERAL_PRIO_CREDITS(x) (((x) >> 1) << 19) #define L3_HIGH_PRIO_CREDITS(x) (((x) >> 1) << 14) +#define L3_PRIO_CREDITS_MASK ((0x1f << 19) | (0x1f << 14)) #define GEN7_L3CNTLREG1 _MMIO(0xB01C) #define GEN7_WA_FOR_GEN7_L3_CONTROL 0x3C47FF8C @@ -7164,9 +7183,6 @@ enum { #define SERR_INT _MMIO(0xc4040) #define SERR_INT_POISON (1<<31) -#define SERR_INT_TRANS_C_FIFO_UNDERRUN (1<<6) -#define SERR_INT_TRANS_B_FIFO_UNDERRUN (1<<3) -#define SERR_INT_TRANS_A_FIFO_UNDERRUN (1<<0) #define SERR_INT_TRANS_FIFO_UNDERRUN(pipe) (1<<((pipe)*3)) /* digital port hotplug */ diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 5c86925a0294..8f3aa4dc0c98 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -108,8 +108,6 @@ int i915_restore_state(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); - i915_gem_restore_fences(dev_priv); - if (IS_GEN4(dev_priv)) pci_write_config_word(pdev, GCDGMBUS, dev_priv->regfile.saveGCDGMBUS); diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 808ea4d5b962..e8ca67a129d2 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -41,6 +41,11 @@ static inline void debug_fence_init(struct i915_sw_fence *fence) debug_object_init(fence, &i915_sw_fence_debug_descr); } +static inline void debug_fence_init_onstack(struct i915_sw_fence *fence) +{ + debug_object_init_on_stack(fence, &i915_sw_fence_debug_descr); +} + static inline void debug_fence_activate(struct i915_sw_fence *fence) { debug_object_activate(fence, &i915_sw_fence_debug_descr); @@ -79,6 +84,10 @@ static inline void debug_fence_init(struct i915_sw_fence *fence) { } +static inline void debug_fence_init_onstack(struct i915_sw_fence *fence) +{ +} + static inline void debug_fence_activate(struct i915_sw_fence *fence) { } @@ -360,9 +369,9 @@ struct i915_sw_dma_fence_cb { struct irq_work work; }; -static void timer_i915_sw_fence_wake(unsigned long data) +static void timer_i915_sw_fence_wake(struct timer_list *t) { - struct i915_sw_dma_fence_cb *cb = (struct i915_sw_dma_fence_cb *)data; + struct i915_sw_dma_fence_cb *cb = from_timer(cb, t, timer); struct i915_sw_fence *fence; fence = xchg(&cb->fence, NULL); @@ -425,9 +434,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, i915_sw_fence_await(fence); cb->dma = NULL; - __setup_timer(&cb->timer, - timer_i915_sw_fence_wake, (unsigned long)cb, - TIMER_IRQSAFE); + timer_setup(&cb->timer, timer_i915_sw_fence_wake, TIMER_IRQSAFE); init_irq_work(&cb->work, irq_i915_sw_fence_work); if (timeout) { cb->dma = dma_fence_get(dma); @@ -507,5 +514,6 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/lib_sw_fence.c" #include "selftests/i915_sw_fence.c" #endif diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index d61c8727f756..791759f632e1 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -49,7 +49,7 @@ static u32 calc_residency(struct drm_i915_private *dev_priv, static ssize_t show_rc6_mask(struct device *kdev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%x\n", intel_enable_rc6()); + return snprintf(buf, PAGE_SIZE, "%x\n", intel_rc6_enabled()); } static ssize_t @@ -246,7 +246,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { u32 freq; freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); @@ -261,7 +261,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, ret = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; ret = intel_gpu_freq(dev_priv, ret); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); @@ -275,7 +275,7 @@ static ssize_t gt_cur_freq_mhz_show(struct device *kdev, return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, - dev_priv->rps.cur_freq)); + dev_priv->gt_pm.rps.cur_freq)); } static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -284,7 +284,7 @@ static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribu return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, - dev_priv->rps.boost_freq)); + dev_priv->gt_pm.rps.boost_freq)); } static ssize_t gt_boost_freq_mhz_store(struct device *kdev, @@ -292,6 +292,7 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; ssize_t ret; @@ -301,12 +302,12 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, /* Validate against (static) hardware limits */ val = intel_freq_opcode(dev_priv, val); - if (val < dev_priv->rps.min_freq || val > dev_priv->rps.max_freq) + if (val < rps->min_freq || val > rps->max_freq) return -EINVAL; - mutex_lock(&dev_priv->rps.hw_lock); - dev_priv->rps.boost_freq = val; - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); + rps->boost_freq = val; + mutex_unlock(&dev_priv->pcu_lock); return count; } @@ -318,7 +319,7 @@ static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, - dev_priv->rps.efficient_freq)); + dev_priv->gt_pm.rps.efficient_freq)); } static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -327,7 +328,7 @@ static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, - dev_priv->rps.max_freq_softlimit)); + dev_priv->gt_pm.rps.max_freq_softlimit)); } static ssize_t gt_max_freq_mhz_store(struct device *kdev, @@ -335,6 +336,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; ssize_t ret; @@ -344,34 +346,34 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = intel_freq_opcode(dev_priv, val); - if (val < dev_priv->rps.min_freq || - val > dev_priv->rps.max_freq || - val < dev_priv->rps.min_freq_softlimit) { - mutex_unlock(&dev_priv->rps.hw_lock); + if (val < rps->min_freq || + val > rps->max_freq || + val < rps->min_freq_softlimit) { + mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); return -EINVAL; } - if (val > dev_priv->rps.rp0_freq) + if (val > rps->rp0_freq) DRM_DEBUG("User requested overclocking to %d\n", intel_gpu_freq(dev_priv, val)); - dev_priv->rps.max_freq_softlimit = val; + rps->max_freq_softlimit = val; - val = clamp_t(int, dev_priv->rps.cur_freq, - dev_priv->rps.min_freq_softlimit, - dev_priv->rps.max_freq_softlimit); + val = clamp_t(int, rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); /* We still need *_set_rps to process the new max_delay and * update the interrupt limits and PMINTRMSK even though * frequency request may be unchanged. */ ret = intel_set_rps(dev_priv, val); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); @@ -384,7 +386,7 @@ static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, - dev_priv->rps.min_freq_softlimit)); + dev_priv->gt_pm.rps.min_freq_softlimit)); } static ssize_t gt_min_freq_mhz_store(struct device *kdev, @@ -392,6 +394,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; ssize_t ret; @@ -401,30 +404,30 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = intel_freq_opcode(dev_priv, val); - if (val < dev_priv->rps.min_freq || - val > dev_priv->rps.max_freq || - val > dev_priv->rps.max_freq_softlimit) { - mutex_unlock(&dev_priv->rps.hw_lock); + if (val < rps->min_freq || + val > rps->max_freq || + val > rps->max_freq_softlimit) { + mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); return -EINVAL; } - dev_priv->rps.min_freq_softlimit = val; + rps->min_freq_softlimit = val; - val = clamp_t(int, dev_priv->rps.cur_freq, - dev_priv->rps.min_freq_softlimit, - dev_priv->rps.max_freq_softlimit); + val = clamp_t(int, rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); /* We still need *_set_rps to process the new min_delay and * update the interrupt limits and PMINTRMSK even though * frequency request may be unchanged. */ ret = intel_set_rps(dev_priv, val); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); @@ -448,14 +451,15 @@ static DEVICE_ATTR(gt_RPn_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL); static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; if (attr == &dev_attr_gt_RP0_freq_mhz) - val = intel_gpu_freq(dev_priv, dev_priv->rps.rp0_freq); + val = intel_gpu_freq(dev_priv, rps->rp0_freq); else if (attr == &dev_attr_gt_RP1_freq_mhz) - val = intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq); + val = intel_gpu_freq(dev_priv, rps->rp1_freq); else if (attr == &dev_attr_gt_RPn_freq_mhz) - val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq); + val = intel_gpu_freq(dev_priv, rps->min_freq); else BUG(); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 92f4c5bb7aa7..4e76768ffa95 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) #define _I915_TRACE_H_ @@ -345,7 +346,7 @@ TRACE_EVENT(i915_gem_object_create, TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) - __field(u32, size) + __field(u64, size) ), TP_fast_assign( @@ -353,7 +354,7 @@ TRACE_EVENT(i915_gem_object_create, __entry->size = obj->base.size; ), - TP_printk("obj=%p, size=%u", __entry->obj, __entry->size) + TP_printk("obj=%p, size=0x%llx", __entry->obj, __entry->size) ); TRACE_EVENT(i915_gem_shrink, @@ -384,7 +385,7 @@ TRACE_EVENT(i915_vma_bind, __field(struct drm_i915_gem_object *, obj) __field(struct i915_address_space *, vm) __field(u64, offset) - __field(u32, size) + __field(u64, size) __field(unsigned, flags) ), @@ -396,7 +397,7 @@ TRACE_EVENT(i915_vma_bind, __entry->flags = flags; ), - TP_printk("obj=%p, offset=%016llx size=%x%s vm=%p", + TP_printk("obj=%p, offset=0x%016llx size=0x%llx%s vm=%p", __entry->obj, __entry->offset, __entry->size, __entry->flags & PIN_MAPPABLE ? ", mappable" : "", __entry->vm) @@ -410,7 +411,7 @@ TRACE_EVENT(i915_vma_unbind, __field(struct drm_i915_gem_object *, obj) __field(struct i915_address_space *, vm) __field(u64, offset) - __field(u32, size) + __field(u64, size) ), TP_fast_assign( @@ -420,18 +421,18 @@ TRACE_EVENT(i915_vma_unbind, __entry->size = vma->node.size; ), - TP_printk("obj=%p, offset=%016llx size=%x vm=%p", + TP_printk("obj=%p, offset=0x%016llx size=0x%llx vm=%p", __entry->obj, __entry->offset, __entry->size, __entry->vm) ); TRACE_EVENT(i915_gem_object_pwrite, - TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len), + TP_PROTO(struct drm_i915_gem_object *obj, u64 offset, u64 len), TP_ARGS(obj, offset, len), TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) - __field(u32, offset) - __field(u32, len) + __field(u64, offset) + __field(u64, len) ), TP_fast_assign( @@ -440,18 +441,18 @@ TRACE_EVENT(i915_gem_object_pwrite, __entry->len = len; ), - TP_printk("obj=%p, offset=%u, len=%u", + TP_printk("obj=%p, offset=0x%llx, len=0x%llx", __entry->obj, __entry->offset, __entry->len) ); TRACE_EVENT(i915_gem_object_pread, - TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len), + TP_PROTO(struct drm_i915_gem_object *obj, u64 offset, u64 len), TP_ARGS(obj, offset, len), TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) - __field(u32, offset) - __field(u32, len) + __field(u64, offset) + __field(u64, len) ), TP_fast_assign( @@ -460,17 +461,17 @@ TRACE_EVENT(i915_gem_object_pread, __entry->len = len; ), - TP_printk("obj=%p, offset=%u, len=%u", + TP_printk("obj=%p, offset=0x%llx, len=0x%llx", __entry->obj, __entry->offset, __entry->len) ); TRACE_EVENT(i915_gem_object_fault, - TP_PROTO(struct drm_i915_gem_object *obj, u32 index, bool gtt, bool write), + TP_PROTO(struct drm_i915_gem_object *obj, u64 index, bool gtt, bool write), TP_ARGS(obj, index, gtt, write), TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) - __field(u32, index) + __field(u64, index) __field(bool, gtt) __field(bool, write) ), @@ -482,7 +483,7 @@ TRACE_EVENT(i915_gem_object_fault, __entry->write = write; ), - TP_printk("obj=%p, %s index=%u %s", + TP_printk("obj=%p, %s index=%llu %s", __entry->obj, __entry->gtt ? "GTT" : "CPU", __entry->index, @@ -515,14 +516,14 @@ DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy, ); TRACE_EVENT(i915_gem_evict, - TP_PROTO(struct i915_address_space *vm, u32 size, u32 align, unsigned int flags), + TP_PROTO(struct i915_address_space *vm, u64 size, u64 align, unsigned int flags), TP_ARGS(vm, size, align, flags), TP_STRUCT__entry( __field(u32, dev) __field(struct i915_address_space *, vm) - __field(u32, size) - __field(u32, align) + __field(u64, size) + __field(u64, align) __field(unsigned int, flags) ), @@ -534,43 +535,11 @@ TRACE_EVENT(i915_gem_evict, __entry->flags = flags; ), - TP_printk("dev=%d, vm=%p, size=%d, align=%d %s", + TP_printk("dev=%d, vm=%p, size=0x%llx, align=0x%llx %s", __entry->dev, __entry->vm, __entry->size, __entry->align, __entry->flags & PIN_MAPPABLE ? ", mappable" : "") ); -TRACE_EVENT(i915_gem_evict_everything, - TP_PROTO(struct drm_device *dev), - TP_ARGS(dev), - - TP_STRUCT__entry( - __field(u32, dev) - ), - - TP_fast_assign( - __entry->dev = dev->primary->index; - ), - - TP_printk("dev=%d", __entry->dev) -); - -TRACE_EVENT(i915_gem_evict_vm, - TP_PROTO(struct i915_address_space *vm), - TP_ARGS(vm), - - TP_STRUCT__entry( - __field(u32, dev) - __field(struct i915_address_space *, vm) - ), - - TP_fast_assign( - __entry->dev = vm->i915->drm.primary->index; - __entry->vm = vm; - ), - - TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm) -); - TRACE_EVENT(i915_gem_evict_node, TP_PROTO(struct i915_address_space *vm, struct drm_mm_node *node, unsigned int flags), TP_ARGS(vm, node, flags), @@ -593,12 +562,29 @@ TRACE_EVENT(i915_gem_evict_node, __entry->flags = flags; ), - TP_printk("dev=%d, vm=%p, start=%llx size=%llx, color=%lx, flags=%x", + TP_printk("dev=%d, vm=%p, start=0x%llx size=0x%llx, color=0x%lx, flags=%x", __entry->dev, __entry->vm, __entry->start, __entry->size, __entry->color, __entry->flags) ); +TRACE_EVENT(i915_gem_evict_vm, + TP_PROTO(struct i915_address_space *vm), + TP_ARGS(vm), + + TP_STRUCT__entry( + __field(u32, dev) + __field(struct i915_address_space *, vm) + ), + + TP_fast_assign( + __entry->dev = vm->i915->drm.primary->index; + __entry->vm = vm; + ), + + TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm) +); + TRACE_EVENT(i915_gem_ring_sync_to, TP_PROTO(struct drm_i915_gem_request *to, struct drm_i915_gem_request *from), @@ -649,29 +635,6 @@ TRACE_EVENT(i915_gem_request_queue, __entry->flags) ); -TRACE_EVENT(i915_gem_ring_flush, - TP_PROTO(struct drm_i915_gem_request *req, u32 invalidate, u32 flush), - TP_ARGS(req, invalidate, flush), - - TP_STRUCT__entry( - __field(u32, dev) - __field(u32, ring) - __field(u32, invalidate) - __field(u32, flush) - ), - - TP_fast_assign( - __entry->dev = req->i915->drm.primary->index; - __entry->ring = req->engine->id; - __entry->invalidate = invalidate; - __entry->flush = flush; - ), - - TP_printk("dev=%u, ring=%x, invalidate=%04x, flush=%04x", - __entry->dev, __entry->ring, - __entry->invalidate, __entry->flush) -); - DECLARE_EVENT_CLASS(i915_gem_request, TP_PROTO(struct drm_i915_gem_request *req), TP_ARGS(req), diff --git a/drivers/gpu/drm/i915/i915_trace_points.c b/drivers/gpu/drm/i915/i915_trace_points.c index f1df2bd4ecf4..463a7177997c 100644 --- a/drivers/gpu/drm/i915/i915_trace_points.c +++ b/drivers/gpu/drm/i915/i915_trace_points.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright © 2009 Intel Corporation * diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 12fc250b47b9..af3d7cc53fa1 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -99,6 +99,11 @@ __T; \ }) +static inline u64 ptr_to_u64(const void *ptr) +{ + return (uintptr_t)ptr; +} + #define u64_to_ptr(T, x) ({ \ typecheck(u64, x); \ (T *)(uintptr_t)(x); \ @@ -119,4 +124,17 @@ static inline void __list_del_many(struct list_head *head, WRITE_ONCE(head->next, first); } +/* + * Wait until the work is finally complete, even if it tries to postpone + * by requeueing itself. Note, that if the worker never cancels itself, + * we will spin forever. + */ +static inline void drain_delayed_work(struct delayed_work *dw) +{ + do { + while (flush_delayed_work(dw)) + ; + } while (delayed_work_pending(dw)); +} + #endif /* !__I915_UTILS_H */ diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h index b72bd2956b70..bb8338450dc1 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.h +++ b/drivers/gpu/drm/i915/i915_vgpu.h @@ -30,6 +30,12 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv); bool intel_vgpu_has_full_48bit_ppgtt(struct drm_i915_private *dev_priv); +static inline bool +intel_vgpu_has_hwsp_emulation(struct drm_i915_private *dev_priv) +{ + return dev_priv->vgpu.caps & VGT_CAPS_HWSP_EMULATION; +} + int intel_vgt_balloon(struct drm_i915_private *dev_priv); void intel_vgt_deballoon(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 02d1a5eacb00..fbfab2f33023 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -54,12 +54,21 @@ i915_vma_retire(struct i915_gem_active *active, if (--obj->active_count) return; + /* Prune the shared fence arrays iff completely idle (inc. external) */ + if (reservation_object_trylock(obj->resv)) { + if (reservation_object_test_signaled_rcu(obj->resv, true)) + reservation_object_add_excl_fence(obj->resv, NULL); + reservation_object_unlock(obj->resv); + } + /* Bump our place on the bound list to keep it roughly in LRU order * so that we don't steal from recently used but inactive objects * (unless we are forced to ofc!) */ + spin_lock(&rq->i915->mm.obj_lock); if (obj->bind_count) - list_move_tail(&obj->global_link, &rq->i915->mm.bound_list); + list_move_tail(&obj->mm.link, &rq->i915->mm.bound_list); + spin_unlock(&rq->i915->mm.obj_lock); obj->mm.dirty = true; /* be paranoid */ @@ -266,6 +275,8 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, if (bind_flags == 0) return 0; + GEM_BUG_ON(!vma->pages); + trace_i915_vma_bind(vma, bind_flags); ret = vma->vm->bind_vma(vma, cache_level, bind_flags); if (ret) @@ -278,13 +289,16 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) { void __iomem *ptr; + int err; /* Access through the GTT requires the device to be awake. */ assert_rpm_wakelock_held(vma->vm->i915); lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) - return IO_ERR_PTR(-ENODEV); + if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { + err = -ENODEV; + goto err; + } GEM_BUG_ON(!i915_vma_is_ggtt(vma)); GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); @@ -294,14 +308,36 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable, vma->node.start, vma->node.size); - if (ptr == NULL) - return IO_ERR_PTR(-ENOMEM); + if (ptr == NULL) { + err = -ENOMEM; + goto err; + } vma->iomap = ptr; } __i915_vma_pin(vma); + + err = i915_vma_pin_fence(vma); + if (err) + goto err_unpin; + return ptr; + +err_unpin: + __i915_vma_unpin(vma); +err: + return IO_ERR_PTR(err); +} + +void i915_vma_unpin_iomap(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->obj->base.dev->struct_mutex); + + GEM_BUG_ON(vma->iomap == NULL); + + i915_vma_unpin_fence(vma); + i915_vma_unpin(vma); } void i915_vma_unpin_and_release(struct i915_vma **p_vma) @@ -471,25 +507,64 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) if (ret) return ret; + GEM_BUG_ON(vma->pages); + + ret = vma->vm->set_pages(vma); + if (ret) + goto err_unpin; + if (flags & PIN_OFFSET_FIXED) { u64 offset = flags & PIN_OFFSET_MASK; if (!IS_ALIGNED(offset, alignment) || range_overflows(offset, size, end)) { ret = -EINVAL; - goto err_unpin; + goto err_clear; } ret = i915_gem_gtt_reserve(vma->vm, &vma->node, size, offset, obj->cache_level, flags); if (ret) - goto err_unpin; + goto err_clear; } else { + /* + * We only support huge gtt pages through the 48b PPGTT, + * however we also don't want to force any alignment for + * objects which need to be tightly packed into the low 32bits. + * + * Note that we assume that GGTT are limited to 4GiB for the + * forseeable future. See also i915_ggtt_offset(). + */ + if (upper_32_bits(end - 1) && + vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { + /* + * We can't mix 64K and 4K PTEs in the same page-table + * (2M block), and so to avoid the ugliness and + * complexity of coloring we opt for just aligning 64K + * objects to 2M. + */ + u64 page_alignment = + rounddown_pow_of_two(vma->page_sizes.sg | + I915_GTT_PAGE_SIZE_2M); + + /* + * Check we don't expand for the limited Global GTT + * (mappable aperture is even more precious!). This + * also checks that we exclude the aliasing-ppgtt. + */ + GEM_BUG_ON(i915_vma_is_ggtt(vma)); + + alignment = max(alignment, page_alignment); + + if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) + size = round_up(size, I915_GTT_PAGE_SIZE_2M); + } + ret = i915_gem_gtt_insert(vma->vm, &vma->node, size, alignment, obj->cache_level, start, end, flags); if (ret) - goto err_unpin; + goto err_clear; GEM_BUG_ON(vma->node.start < start); GEM_BUG_ON(vma->node.start + vma->node.size > end); @@ -497,13 +572,19 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); - list_move_tail(&obj->global_link, &dev_priv->mm.bound_list); list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + + spin_lock(&dev_priv->mm.obj_lock); + list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); obj->bind_count++; + spin_unlock(&dev_priv->mm.obj_lock); + GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); return 0; +err_clear: + vma->vm->clear_pages(vma); err_unpin: i915_gem_object_unpin_pages(obj); return ret; @@ -512,20 +593,24 @@ err_unpin: static void i915_vma_remove(struct i915_vma *vma) { + struct drm_i915_private *i915 = vma->vm->i915; struct drm_i915_gem_object *obj = vma->obj; GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + vma->vm->clear_pages(vma); + drm_mm_remove_node(&vma->node); list_move_tail(&vma->vm_link, &vma->vm->unbound_list); /* Since the unbound list is global, only move to that list if * no more VMAs exist. */ + spin_lock(&i915->mm.obj_lock); if (--obj->bind_count == 0) - list_move_tail(&obj->global_link, - &to_i915(obj->base.dev)->mm.unbound_list); + list_move_tail(&obj->mm.link, &i915->mm.unbound_list); + spin_unlock(&i915->mm.obj_lock); /* And finally now the object is completely decoupled from this vma, * we can drop its hold on the backing storage and allow it to be @@ -569,8 +654,8 @@ int __i915_vma_do_pin(struct i915_vma *vma, err_remove: if ((bound & I915_VMA_BIND_MASK) == 0) { - GEM_BUG_ON(vma->pages); i915_vma_remove(vma); + GEM_BUG_ON(vma->pages); } err_unpin: __i915_vma_unpin(vma); @@ -620,6 +705,30 @@ static void __i915_vma_iounmap(struct i915_vma *vma) vma->iomap = NULL; } +void i915_vma_revoke_mmap(struct i915_vma *vma) +{ + struct drm_vma_offset_node *node = &vma->obj->base.vma_node; + u64 vma_offset; + + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + + if (!i915_vma_has_userfault(vma)) + return; + + GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); + GEM_BUG_ON(!vma->obj->userfault_count); + + vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; + unmap_mapping_range(vma->vm->i915->drm.anon_inode->i_mapping, + drm_vma_node_offset_addr(node) + vma_offset, + vma->size, + 1); + + i915_vma_unset_userfault(vma); + if (!--vma->obj->userfault_count) + list_del(&vma->obj->userfault_link); +} + int i915_vma_unbind(struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; @@ -683,11 +792,13 @@ int i915_vma_unbind(struct i915_vma *vma) return ret; /* Force a pagefault for domain tracking on next user access */ - i915_gem_release_mmap(obj); + i915_vma_revoke_mmap(vma); __i915_vma_iounmap(vma); vma->flags &= ~I915_VMA_CAN_FENCE; } + GEM_BUG_ON(vma->fence); + GEM_BUG_ON(i915_vma_has_userfault(vma)); if (likely(!vma->vm->closed)) { trace_i915_vma_unbind(vma); @@ -695,13 +806,6 @@ int i915_vma_unbind(struct i915_vma *vma) } vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); - if (vma->pages != obj->mm.pages) { - GEM_BUG_ON(!vma->pages); - sg_free_table(vma->pages); - kfree(vma->pages); - } - vma->pages = NULL; - i915_vma_remove(vma); destroy: diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index e811067c7724..1e2bc9b3c3ac 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -55,6 +55,7 @@ struct i915_vma { void __iomem *iomap; u64 size; u64 display_alignment; + struct i915_page_sizes page_sizes; u32 fence_size; u32 fence_alignment; @@ -65,7 +66,7 @@ struct i915_vma { * that exist in the ctx->handle_vmas LUT for this vma. */ unsigned int open_count; - unsigned int flags; + unsigned long flags; /** * How many users have pinned this object in GTT space. The following * users can each hold at most one reference: pwrite/pread, execbuffer @@ -87,6 +88,8 @@ struct i915_vma { #define I915_VMA_GGTT BIT(8) #define I915_VMA_CAN_FENCE BIT(9) #define I915_VMA_CLOSED BIT(10) +#define I915_VMA_USERFAULT_BIT 11 +#define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT) unsigned int active; struct i915_gem_active last_read[I915_NUM_ENGINES]; @@ -145,6 +148,22 @@ static inline bool i915_vma_is_closed(const struct i915_vma *vma) return vma->flags & I915_VMA_CLOSED; } +static inline bool i915_vma_set_userfault(struct i915_vma *vma) +{ + GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); + return __test_and_set_bit(I915_VMA_USERFAULT_BIT, &vma->flags); +} + +static inline void i915_vma_unset_userfault(struct i915_vma *vma) +{ + return __clear_bit(I915_VMA_USERFAULT_BIT, &vma->flags); +} + +static inline bool i915_vma_has_userfault(const struct i915_vma *vma) +{ + return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags); +} + static inline unsigned int i915_vma_get_active(const struct i915_vma *vma) { return vma->active; @@ -243,6 +262,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level); bool i915_vma_misplaced(const struct i915_vma *vma, u64 size, u64 alignment, u64 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); +void i915_vma_revoke_mmap(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); void i915_vma_unlink_ctx(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); @@ -321,12 +341,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); * Callers must hold the struct_mutex. This function is only valid to be * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap(). */ -static inline void i915_vma_unpin_iomap(struct i915_vma *vma) -{ - lockdep_assert_held(&vma->obj->base.dev->struct_mutex); - GEM_BUG_ON(vma->iomap == NULL); - i915_vma_unpin(vma); -} +void i915_vma_unpin_iomap(struct i915_vma *vma); static inline struct page *i915_vma_first_page(struct i915_vma *vma) { @@ -349,15 +364,13 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma) * * True if the vma has a fence, false otherwise. */ -static inline bool -i915_vma_pin_fence(struct i915_vma *vma) +int i915_vma_pin_fence(struct i915_vma *vma); +int __must_check i915_vma_put_fence(struct i915_vma *vma); + +static inline void __i915_vma_unpin_fence(struct i915_vma *vma) { - lockdep_assert_held(&vma->obj->base.dev->struct_mutex); - if (vma->fence) { - vma->fence->pin_count++; - return true; - } else - return false; + GEM_BUG_ON(vma->fence->pin_count <= 0); + vma->fence->pin_count--; } /** @@ -372,10 +385,8 @@ static inline void i915_vma_unpin_fence(struct i915_vma *vma) { lockdep_assert_held(&vma->obj->base.dev->struct_mutex); - if (vma->fence) { - GEM_BUG_ON(vma->fence->pin_count <= 0); - vma->fence->pin_count--; - } + if (vma->fence) + __i915_vma_unpin_fence(vma); } #endif diff --git a/drivers/gpu/drm/i915/intel_acpi.c b/drivers/gpu/drm/i915/intel_acpi.c index 42fb436f6cdc..d1abf4bb7c81 100644 --- a/drivers/gpu/drm/i915/intel_acpi.c +++ b/drivers/gpu/drm/i915/intel_acpi.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Intel ACPI functions * diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 27743be5b768..0ddba16fde1b 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -754,7 +754,7 @@ static struct intel_encoder *get_saved_enc(struct drm_i915_private *dev_priv, { struct intel_encoder *encoder; - if (WARN_ON(pipe >= I915_MAX_PIPES)) + if (WARN_ON(pipe >= INTEL_INFO(dev_priv)->num_pipes)) return NULL; /* MST */ diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index b881ce8596ee..fd23023df7c1 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -431,27 +431,6 @@ parse_general_features(struct drm_i915_private *dev_priv, dev_priv->vbt.fdi_rx_polarity_inverted); } -static void -parse_general_definitions(struct drm_i915_private *dev_priv, - const struct bdb_header *bdb) -{ - const struct bdb_general_definitions *general; - - general = find_section(bdb, BDB_GENERAL_DEFINITIONS); - if (general) { - u16 block_size = get_blocksize(general); - if (block_size >= sizeof(*general)) { - int bus_pin = general->crt_ddc_gmbus_pin; - DRM_DEBUG_KMS("crt_ddc_bus_pin: %d\n", bus_pin); - if (intel_gmbus_is_valid_pin(dev_priv, bus_pin)) - dev_priv->vbt.crt_ddc_pin = bus_pin; - } else { - DRM_DEBUG_KMS("BDB_GD too small (%d). Invalid.\n", - block_size); - } - } -} - static const struct child_device_config * child_device_ptr(const struct bdb_general_definitions *defs, int i) { @@ -459,41 +438,24 @@ child_device_ptr(const struct bdb_general_definitions *defs, int i) } static void -parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, - const struct bdb_header *bdb) +parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, u8 bdb_version) { struct sdvo_device_mapping *mapping; - const struct bdb_general_definitions *defs; const struct child_device_config *child; - int i, child_device_num, count; - u16 block_size; - - defs = find_section(bdb, BDB_GENERAL_DEFINITIONS); - if (!defs) { - DRM_DEBUG_KMS("No general definition block is found, unable to construct sdvo mapping.\n"); - return; - } + int i, count = 0; /* - * Only parse SDVO mappings when the general definitions block child - * device size matches that of the *legacy* child device config - * struct. Thus, SDVO mapping will be skipped for newer VBT. + * Only parse SDVO mappings on gens that could have SDVO. This isn't + * accurate and doesn't have to be, as long as it's not too strict. */ - if (defs->child_dev_size != LEGACY_CHILD_DEVICE_CONFIG_SIZE) { - DRM_DEBUG_KMS("Unsupported child device size for SDVO mapping.\n"); + if (!IS_GEN(dev_priv, 3, 7)) { + DRM_DEBUG_KMS("Skipping SDVO device mapping\n"); return; } - /* get the block size of general definitions */ - block_size = get_blocksize(defs); - /* get the number of child device */ - child_device_num = (block_size - sizeof(*defs)) / defs->child_dev_size; - count = 0; - for (i = 0; i < child_device_num; i++) { - child = child_device_ptr(defs, i); - if (!child->device_type) { - /* skip the device block if device type is invalid */ - continue; - } + + for (i = 0, count = 0; i < dev_priv->vbt.child_dev_num; i++) { + child = dev_priv->vbt.child_dev + i; + if (child->slave_addr != SLAVE_ADDR1 && child->slave_addr != SLAVE_ADDR2) { /* @@ -544,7 +506,6 @@ parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, /* No SDVO device info is found */ DRM_DEBUG_KMS("No SDVO device info is found in VBT\n"); } - return; } static void @@ -730,6 +691,48 @@ parse_psr(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) dev_priv->vbt.psr.tp2_tp3_wakeup_time = psr_table->tp2_tp3_wakeup_time; } +static void parse_dsi_backlight_ports(struct drm_i915_private *dev_priv, + u16 version, enum port port) +{ + if (!dev_priv->vbt.dsi.config->dual_link || version < 197) { + dev_priv->vbt.dsi.bl_ports = BIT(port); + if (dev_priv->vbt.dsi.config->cabc_supported) + dev_priv->vbt.dsi.cabc_ports = BIT(port); + + return; + } + + switch (dev_priv->vbt.dsi.config->dl_dcs_backlight_ports) { + case DL_DCS_PORT_A: + dev_priv->vbt.dsi.bl_ports = BIT(PORT_A); + break; + case DL_DCS_PORT_C: + dev_priv->vbt.dsi.bl_ports = BIT(PORT_C); + break; + default: + case DL_DCS_PORT_A_AND_C: + dev_priv->vbt.dsi.bl_ports = BIT(PORT_A) | BIT(PORT_C); + break; + } + + if (!dev_priv->vbt.dsi.config->cabc_supported) + return; + + switch (dev_priv->vbt.dsi.config->dl_dcs_cabc_ports) { + case DL_DCS_PORT_A: + dev_priv->vbt.dsi.cabc_ports = BIT(PORT_A); + break; + case DL_DCS_PORT_C: + dev_priv->vbt.dsi.cabc_ports = BIT(PORT_C); + break; + default: + case DL_DCS_PORT_A_AND_C: + dev_priv->vbt.dsi.cabc_ports = + BIT(PORT_A) | BIT(PORT_C); + break; + } +} + static void parse_mipi_config(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) @@ -738,9 +741,10 @@ parse_mipi_config(struct drm_i915_private *dev_priv, const struct mipi_config *config; const struct mipi_pps_data *pps; int panel_type = dev_priv->vbt.panel_type; + enum port port; /* parse MIPI blocks only if LFP type is MIPI */ - if (!intel_bios_is_dsi_present(dev_priv, NULL)) + if (!intel_bios_is_dsi_present(dev_priv, &port)) return; /* Initialize this to undefined indicating no generic MIPI support */ @@ -781,15 +785,7 @@ parse_mipi_config(struct drm_i915_private *dev_priv, return; } - /* - * These fields are introduced from the VBT version 197 onwards, - * so making sure that these bits are set zero in the previous - * versions. - */ - if (dev_priv->vbt.dsi.config->dual_link && bdb->version < 197) { - dev_priv->vbt.dsi.config->dl_dcs_cabc_ports = 0; - dev_priv->vbt.dsi.config->dl_dcs_backlight_ports = 0; - } + parse_dsi_backlight_ports(dev_priv, bdb->version, port); /* We have mandatory mipi config blocks. Initialize as generic panel */ dev_priv->vbt.dsi.panel_id = MIPI_DSI_GENERIC_PANEL_ID; @@ -1110,8 +1106,24 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv, } } +static const u8 cnp_ddc_pin_map[] = { + [DDC_BUS_DDI_B] = GMBUS_PIN_1_BXT, + [DDC_BUS_DDI_C] = GMBUS_PIN_2_BXT, + [DDC_BUS_DDI_D] = GMBUS_PIN_4_CNP, /* sic */ + [DDC_BUS_DDI_F] = GMBUS_PIN_3_BXT, /* sic */ +}; + +static u8 map_ddc_pin(struct drm_i915_private *dev_priv, u8 vbt_pin) +{ + if (HAS_PCH_CNP(dev_priv) && + vbt_pin > 0 && vbt_pin < ARRAY_SIZE(cnp_ddc_pin_map)) + return cnp_ddc_pin_map[vbt_pin]; + + return vbt_pin; +} + static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, - const struct bdb_header *bdb) + u8 bdb_version) { struct child_device_config *it, *child = NULL; struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port]; @@ -1170,6 +1182,13 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, is_hdmi = false; } + if (port == PORT_A && is_dvi) { + DRM_DEBUG_KMS("VBT claims port A supports DVI%s, ignoring\n", + is_hdmi ? "/HDMI" : ""); + is_dvi = false; + is_hdmi = false; + } + info->supports_dvi = is_dvi; info->supports_hdmi = is_hdmi; info->supports_dp = is_dp; @@ -1195,16 +1214,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, DRM_DEBUG_KMS("Port %c is internal DP\n", port_name(port)); if (is_dvi) { - info->alternate_ddc_pin = ddc_pin; - - /* - * All VBTs that we got so far for B Stepping has this - * information wrong for Port D. So, let's just ignore for now. - */ - if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0) && - port == PORT_D) { - info->alternate_ddc_pin = 0; - } + info->alternate_ddc_pin = map_ddc_pin(dev_priv, ddc_pin); sanitize_ddc_pin(dev_priv, port); } @@ -1215,7 +1225,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, sanitize_aux_ch(dev_priv, port); } - if (bdb->version >= 158) { + if (bdb_version >= 158) { /* The VBT HDMI level shift values match the table we have. */ hdmi_level_shift = child->hdmi_level_shifter_value; DRM_DEBUG_KMS("VBT HDMI level shift for port %c: %d\n", @@ -1225,7 +1235,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, } /* Parse the I_boost config for SKL and above */ - if (bdb->version >= 196 && child->iboost) { + if (bdb_version >= 196 && child->iboost) { info->dp_boost_level = translate_iboost(child->dp_iboost_level); DRM_DEBUG_KMS("VBT (e)DP boost level for port %c: %d\n", port_name(port), info->dp_boost_level); @@ -1235,40 +1245,52 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, } } -static void parse_ddi_ports(struct drm_i915_private *dev_priv, - const struct bdb_header *bdb) +static void parse_ddi_ports(struct drm_i915_private *dev_priv, u8 bdb_version) { enum port port; - if (!HAS_DDI(dev_priv)) + if (!HAS_DDI(dev_priv) && !IS_CHERRYVIEW(dev_priv)) return; if (!dev_priv->vbt.child_dev_num) return; - if (bdb->version < 155) + if (bdb_version < 155) return; for (port = PORT_A; port < I915_MAX_PORTS; port++) - parse_ddi_port(dev_priv, port, bdb); + parse_ddi_port(dev_priv, port, bdb_version); } static void -parse_device_mapping(struct drm_i915_private *dev_priv, - const struct bdb_header *bdb) +parse_general_definitions(struct drm_i915_private *dev_priv, + const struct bdb_header *bdb) { const struct bdb_general_definitions *defs; const struct child_device_config *child; - struct child_device_config *child_dev_ptr; int i, child_device_num, count; u8 expected_size; u16 block_size; + int bus_pin; defs = find_section(bdb, BDB_GENERAL_DEFINITIONS); if (!defs) { DRM_DEBUG_KMS("No general definition block is found, no devices defined.\n"); return; } + + block_size = get_blocksize(defs); + if (block_size < sizeof(*defs)) { + DRM_DEBUG_KMS("General definitions block too small (%u)\n", + block_size); + return; + } + + bus_pin = defs->crt_ddc_gmbus_pin; + DRM_DEBUG_KMS("crt_ddc_bus_pin: %d\n", bus_pin); + if (intel_gmbus_is_valid_pin(dev_priv, bus_pin)) + dev_priv->vbt.crt_ddc_pin = bus_pin; + if (bdb->version < 106) { expected_size = 22; } else if (bdb->version < 111) { @@ -1298,18 +1320,14 @@ parse_device_mapping(struct drm_i915_private *dev_priv, return; } - /* get the block size of general definitions */ - block_size = get_blocksize(defs); /* get the number of child device */ child_device_num = (block_size - sizeof(*defs)) / defs->child_dev_size; count = 0; /* get the number of child device that is present */ for (i = 0; i < child_device_num; i++) { child = child_device_ptr(defs, i); - if (!child->device_type) { - /* skip the device block if device type is invalid */ + if (!child->device_type) continue; - } count++; } if (!count) { @@ -1326,36 +1344,18 @@ parse_device_mapping(struct drm_i915_private *dev_priv, count = 0; for (i = 0; i < child_device_num; i++) { child = child_device_ptr(defs, i); - if (!child->device_type) { - /* skip the device block if device type is invalid */ + if (!child->device_type) continue; - } - - child_dev_ptr = dev_priv->vbt.child_dev + count; - count++; /* * Copy as much as we know (sizeof) and is available * (child_dev_size) of the child device. Accessing the data must * depend on VBT version. */ - memcpy(child_dev_ptr, child, + memcpy(dev_priv->vbt.child_dev + count, child, min_t(size_t, defs->child_dev_size, sizeof(*child))); - - /* - * copied full block, now init values when they are not - * available in current version - */ - if (bdb->version < 196) { - /* Set default values for bits added from v196 */ - child_dev_ptr->iboost = 0; - child_dev_ptr->hpd_invert = 0; - } - - if (bdb->version < 192) - child_dev_ptr->lspcon = 0; + count++; } - return; } /* Common defaults which may be overridden by VBT. */ @@ -1536,14 +1536,15 @@ void intel_bios_init(struct drm_i915_private *dev_priv) parse_lfp_panel_data(dev_priv, bdb); parse_lfp_backlight(dev_priv, bdb); parse_sdvo_panel_data(dev_priv, bdb); - parse_sdvo_device_mapping(dev_priv, bdb); - parse_device_mapping(dev_priv, bdb); parse_driver_features(dev_priv, bdb); parse_edp(dev_priv, bdb); parse_psr(dev_priv, bdb); parse_mipi_config(dev_priv, bdb); parse_mipi_sequence(dev_priv, bdb); - parse_ddi_ports(dev_priv, bdb); + + /* Further processing on pre-parsed data */ + parse_sdvo_device_mapping(dev_priv, bdb->version); + parse_ddi_ports(dev_priv, bdb->version); out: if (!vbt) { diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 29c62d481cef..48e1ba01ccf8 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -74,9 +74,10 @@ static noinline void missed_breadcrumb(struct intel_engine_cs *engine) set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); } -static void intel_breadcrumbs_hangcheck(unsigned long data) +static void intel_breadcrumbs_hangcheck(struct timer_list *t) { - struct intel_engine_cs *engine = (struct intel_engine_cs *)data; + struct intel_engine_cs *engine = from_timer(engine, t, + breadcrumbs.hangcheck); struct intel_breadcrumbs *b = &engine->breadcrumbs; if (!b->irq_armed) @@ -108,9 +109,10 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) } } -static void intel_breadcrumbs_fake_irq(unsigned long data) +static void intel_breadcrumbs_fake_irq(struct timer_list *t) { - struct intel_engine_cs *engine = (struct intel_engine_cs *)data; + struct intel_engine_cs *engine = from_timer(engine, t, + breadcrumbs.fake_irq); struct intel_breadcrumbs *b = &engine->breadcrumbs; /* The timer persists in case we cannot enable interrupts, @@ -787,12 +789,8 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) spin_lock_init(&b->rb_lock); spin_lock_init(&b->irq_lock); - setup_timer(&b->fake_irq, - intel_breadcrumbs_fake_irq, - (unsigned long)engine); - setup_timer(&b->hangcheck, - intel_breadcrumbs_hangcheck, - (unsigned long)engine); + timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0); + timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0); /* Spawn a thread to provide a common bottom-half for all signals. * As this is an asynchronous interface we cannot steal the current diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 87fc42b19336..b2a6d62b71c0 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -503,7 +503,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, else cmd = 0; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); val &= ~DSPFREQGUAR_MASK; val |= (cmd << DSPFREQGUAR_SHIFT); @@ -513,7 +513,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, 50)) { DRM_ERROR("timed out waiting for CDclk change\n"); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); mutex_lock(&dev_priv->sb_lock); @@ -590,7 +590,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, */ cmd = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); val &= ~DSPFREQGUAR_MASK_CHV; val |= (cmd << DSPFREQGUAR_SHIFT_CHV); @@ -600,7 +600,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, 50)) { DRM_ERROR("timed out waiting for CDclk change\n"); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); @@ -656,10 +656,10 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, "trying to change cdclk frequency with cdclk not enabled\n")) return; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("failed to inform pcode about cdclk change\n"); return; @@ -712,9 +712,9 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) DRM_ERROR("Switching back to LCPLL failed\n"); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, data); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); @@ -928,12 +928,12 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, WARN_ON((cdclk == 24000) != (vco == 0)); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", ret); @@ -975,9 +975,9 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, POSTING_READ(CDCLK_CTL); /* inform PCU of the change */ - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); } @@ -1268,10 +1268,10 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, } /* Inform power controller of upcoming frequency change */ - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, 0x80000000); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", @@ -1300,10 +1300,10 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; I915_WRITE(CDCLK_CTL, val); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, DIV_ROUND_UP(cdclk, 25000)); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", @@ -1518,12 +1518,12 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, u32 val, divider, pcu_ack; int ret; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", ret); @@ -1575,9 +1575,9 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, I915_WRITE(CDCLK_CTL, val); /* inform PCU of the change */ - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); } diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index ff9ecd211abb..b8315bca852b 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -74,7 +74,7 @@ #define I9XX_CSC_COEFF_1_0 \ ((7 << 12) | I9XX_CSC_COEFF_FP(CTM_COEFF_1_0, 8)) -static bool crtc_state_is_legacy(struct drm_crtc_state *state) +static bool crtc_state_is_legacy_gamma(struct drm_crtc_state *state) { return !state->degamma_lut && !state->ctm && @@ -288,7 +288,7 @@ static void cherryview_load_csc_matrix(struct drm_crtc_state *state) } mode = (state->ctm ? CGM_PIPE_MODE_CSC : 0); - if (!crtc_state_is_legacy(state)) { + if (!crtc_state_is_legacy_gamma(state)) { mode |= (state->degamma_lut ? CGM_PIPE_MODE_DEGAMMA : 0) | (state->gamma_lut ? CGM_PIPE_MODE_GAMMA : 0); } @@ -469,7 +469,7 @@ static void broadwell_load_luts(struct drm_crtc_state *state) struct intel_crtc_state *intel_state = to_intel_crtc_state(state); enum pipe pipe = to_intel_crtc(state->crtc)->pipe; - if (crtc_state_is_legacy(state)) { + if (crtc_state_is_legacy_gamma(state)) { haswell_load_luts(state); return; } @@ -529,7 +529,7 @@ static void glk_load_luts(struct drm_crtc_state *state) glk_load_degamma_lut(state); - if (crtc_state_is_legacy(state)) { + if (crtc_state_is_legacy_gamma(state)) { haswell_load_luts(state); return; } @@ -551,7 +551,7 @@ static void cherryview_load_luts(struct drm_crtc_state *state) uint32_t i, lut_size; uint32_t word0, word1; - if (crtc_state_is_legacy(state)) { + if (crtc_state_is_legacy_gamma(state)) { /* Turn off degamma/gamma on CGM block. */ I915_WRITE(CGM_PIPE_MODE(pipe), (state->ctm ? CGM_PIPE_MODE_CSC : 0)); @@ -632,12 +632,10 @@ int intel_color_check(struct drm_crtc *crtc, return 0; /* - * We also allow no degamma lut and a gamma lut at the legacy + * We also allow no degamma lut/ctm and a gamma lut at the legacy * size (256 entries). */ - if (!crtc_state->degamma_lut && - crtc_state->gamma_lut && - crtc_state->gamma_lut->length == LEGACY_LUT_LENGTH) + if (crtc_state_is_legacy_gamma(crtc_state)) return 0; return -EINVAL; diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 954070255b4d..437339f5d098 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -213,6 +213,19 @@ static void pch_post_disable_crt(struct intel_encoder *encoder, intel_disable_crt(encoder, old_crtc_state, old_conn_state); } +static void hsw_disable_crt(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + struct drm_crtc *crtc = old_crtc_state->base.crtc; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + + WARN_ON(!intel_crtc->config->has_pch_encoder); + + intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); +} + static void hsw_post_disable_crt(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) @@ -225,6 +238,58 @@ static void hsw_post_disable_crt(struct intel_encoder *encoder, lpt_disable_iclkip(dev_priv); intel_ddi_fdi_post_disable(encoder, old_crtc_state, old_conn_state); + + WARN_ON(!old_crtc_state->has_pch_encoder); + + intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); +} + +static void hsw_pre_pll_enable_crt(struct intel_encoder *encoder, + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) +{ + struct drm_crtc *crtc = pipe_config->base.crtc; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + + WARN_ON(!intel_crtc->config->has_pch_encoder); + + intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); +} + +static void hsw_pre_enable_crt(struct intel_encoder *encoder, + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) +{ + struct drm_crtc *crtc = pipe_config->base.crtc; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pipe = intel_crtc->pipe; + + WARN_ON(!intel_crtc->config->has_pch_encoder); + + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); + + dev_priv->display.fdi_link_train(intel_crtc, pipe_config); +} + +static void hsw_enable_crt(struct intel_encoder *encoder, + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) +{ + struct drm_crtc *crtc = pipe_config->base.crtc; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pipe = intel_crtc->pipe; + + WARN_ON(!intel_crtc->config->has_pch_encoder); + + intel_crt_set_dpms(encoder, pipe_config, DRM_MODE_DPMS_ON); + + intel_wait_for_vblank(dev_priv, pipe); + intel_wait_for_vblank(dev_priv, pipe); + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); + intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); } static void intel_enable_crt(struct intel_encoder *encoder, @@ -279,10 +344,25 @@ static bool intel_crt_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { + return true; +} + +static bool pch_crt_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) +{ + pipe_config->has_pch_encoder = true; + + return true; +} + +static bool hsw_crt_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) +{ struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (HAS_PCH_SPLIT(dev_priv)) - pipe_config->has_pch_encoder = true; + pipe_config->has_pch_encoder = true; /* LPT FDI RX only supports 8bpc. */ if (HAS_PCH_LPT(dev_priv)) { @@ -295,8 +375,7 @@ static bool intel_crt_compute_config(struct intel_encoder *encoder, } /* FDI must always be 2.7 GHz */ - if (HAS_DDI(dev_priv)) - pipe_config->port_clock = 135000 * 2; + pipe_config->port_clock = 135000 * 2; return true; } @@ -890,26 +969,33 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->base.power_domain = POWER_DOMAIN_PORT_CRT; - crt->base.compute_config = intel_crt_compute_config; - if (HAS_PCH_SPLIT(dev_priv)) { - crt->base.disable = pch_disable_crt; - crt->base.post_disable = pch_post_disable_crt; - } else { - crt->base.disable = intel_disable_crt; - } - crt->base.enable = intel_enable_crt; if (I915_HAS_HOTPLUG(dev_priv) && !dmi_check_system(intel_spurious_crt_detect)) crt->base.hpd_pin = HPD_CRT; + if (HAS_DDI(dev_priv)) { crt->base.port = PORT_E; crt->base.get_config = hsw_crt_get_config; crt->base.get_hw_state = intel_ddi_get_hw_state; + crt->base.compute_config = hsw_crt_compute_config; + crt->base.pre_pll_enable = hsw_pre_pll_enable_crt; + crt->base.pre_enable = hsw_pre_enable_crt; + crt->base.enable = hsw_enable_crt; + crt->base.disable = hsw_disable_crt; crt->base.post_disable = hsw_post_disable_crt; } else { + if (HAS_PCH_SPLIT(dev_priv)) { + crt->base.compute_config = pch_crt_compute_config; + crt->base.disable = pch_disable_crt; + crt->base.post_disable = pch_post_disable_crt; + } else { + crt->base.compute_config = intel_crt_compute_config; + crt->base.disable = intel_disable_crt; + } crt->base.port = PORT_NONE; crt->base.get_config = intel_crt_get_config; crt->base.get_hw_state = intel_crt_get_hw_state; + crt->base.enable = intel_enable_crt; } intel_connector->get_hw_state = intel_connector_get_hw_state; diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index cdfb624eb82d..da9de47562b8 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -52,10 +52,6 @@ MODULE_FIRMWARE(I915_CSR_SKL); MODULE_FIRMWARE(I915_CSR_BXT); #define BXT_CSR_VERSION_REQUIRED CSR_VERSION(1, 7) -#define FIRMWARE_URL "https://01.org/linuxgraphics/downloads/firmware" - - - #define CSR_MAX_FW_SIZE 0x2FFF #define CSR_DEFAULT_FW_OFFSET 0xFFFFFFFF @@ -216,7 +212,7 @@ static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv) mask = DC_STATE_DEBUG_MASK_MEMORY_UP; - if (IS_BROXTON(dev_priv)) + if (IS_GEN9_LP(dev_priv)) mask |= DC_STATE_DEBUG_MASK_CORES; /* The below bit doesn't need to be cleared ever afterwards */ @@ -291,7 +287,8 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, css_header = (struct intel_css_header *)fw->data; if (sizeof(struct intel_css_header) != (css_header->header_len * 4)) { - DRM_ERROR("Firmware has wrong CSS header length %u bytes\n", + DRM_ERROR("DMC firmware has wrong CSS header length " + "(%u bytes)\n", (css_header->header_len * 4)); return NULL; } @@ -315,7 +312,7 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, if (csr->version != required_version) { DRM_INFO("Refusing to load DMC firmware v%u.%u," - " please use v%u.%u [" FIRMWARE_URL "].\n", + " please use v%u.%u\n", CSR_VERSION_MAJOR(csr->version), CSR_VERSION_MINOR(csr->version), CSR_VERSION_MAJOR(required_version), @@ -330,7 +327,8 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, &fw->data[readcount]; if (sizeof(struct intel_package_header) != (package_header->header_len * 4)) { - DRM_ERROR("Firmware has wrong package header length %u bytes\n", + DRM_ERROR("DMC firmware has wrong package header length " + "(%u bytes)\n", (package_header->header_len * 4)); return NULL; } @@ -351,7 +349,7 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, dmc_offset = package_header->fw_info[i].offset; } if (dmc_offset == CSR_DEFAULT_FW_OFFSET) { - DRM_ERROR("Firmware not supported for %c stepping\n", + DRM_ERROR("DMC firmware not supported for %c stepping\n", si->stepping); return NULL; } @@ -360,7 +358,8 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, /* Extract dmc_header information. */ dmc_header = (struct intel_dmc_header *)&fw->data[readcount]; if (sizeof(struct intel_dmc_header) != (dmc_header->header_len)) { - DRM_ERROR("Firmware has wrong dmc header length %u bytes\n", + DRM_ERROR("DMC firmware has wrong dmc header length " + "(%u bytes)\n", (dmc_header->header_len)); return NULL; } @@ -368,7 +367,7 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, /* Cache the dmc header info. */ if (dmc_header->mmio_count > ARRAY_SIZE(csr->mmioaddr)) { - DRM_ERROR("Firmware has wrong mmio count %u\n", + DRM_ERROR("DMC firmware has wrong mmio count %u\n", dmc_header->mmio_count); return NULL; } @@ -376,7 +375,7 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, for (i = 0; i < dmc_header->mmio_count; i++) { if (dmc_header->mmioaddr[i] < CSR_MMIO_START_RANGE || dmc_header->mmioaddr[i] > CSR_MMIO_END_RANGE) { - DRM_ERROR(" Firmware has wrong mmio address 0x%x\n", + DRM_ERROR("DMC firmware has wrong mmio address 0x%x\n", dmc_header->mmioaddr[i]); return NULL; } @@ -387,7 +386,7 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, /* fw_size is in dwords, so multiplied by 4 to convert into bytes. */ nbytes = dmc_header->fw_size * 4; if (nbytes > CSR_MAX_FW_SIZE) { - DRM_ERROR("CSR firmware too big (%u) bytes\n", nbytes); + DRM_ERROR("DMC firmware too big (%u bytes)\n", nbytes); return NULL; } csr->dmc_fw_size = dmc_header->fw_size; @@ -425,9 +424,11 @@ static void csr_load_work_fn(struct work_struct *work) CSR_VERSION_MINOR(csr->version)); } else { dev_notice(dev_priv->drm.dev, - "Failed to load DMC firmware" - " [" FIRMWARE_URL "]," - " disabling runtime power management.\n"); + "Failed to load DMC firmware %s." + " Disabling runtime power management.\n", + csr->fw_path); + dev_notice(dev_priv->drm.dev, "DMC firmware homepage: %s", + INTEL_UC_FIRMWARE_URL); } release_firmware(fw); diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 93cbbcbbc193..933c18fd4258 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -305,35 +305,34 @@ struct bxt_ddi_buf_trans { u8 scale; /* scale value */ u8 enable; /* scale enable */ u8 deemphasis; - bool default_index; /* true if the entry represents default value */ }; static const struct bxt_ddi_buf_trans bxt_ddi_translations_dp[] = { /* Idx NT mV diff db */ - { 52, 0x9A, 0, 128, true }, /* 0: 400 0 */ - { 78, 0x9A, 0, 85, false }, /* 1: 400 3.5 */ - { 104, 0x9A, 0, 64, false }, /* 2: 400 6 */ - { 154, 0x9A, 0, 43, false }, /* 3: 400 9.5 */ - { 77, 0x9A, 0, 128, false }, /* 4: 600 0 */ - { 116, 0x9A, 0, 85, false }, /* 5: 600 3.5 */ - { 154, 0x9A, 0, 64, false }, /* 6: 600 6 */ - { 102, 0x9A, 0, 128, false }, /* 7: 800 0 */ - { 154, 0x9A, 0, 85, false }, /* 8: 800 3.5 */ - { 154, 0x9A, 1, 128, false }, /* 9: 1200 0 */ + { 52, 0x9A, 0, 128, }, /* 0: 400 0 */ + { 78, 0x9A, 0, 85, }, /* 1: 400 3.5 */ + { 104, 0x9A, 0, 64, }, /* 2: 400 6 */ + { 154, 0x9A, 0, 43, }, /* 3: 400 9.5 */ + { 77, 0x9A, 0, 128, }, /* 4: 600 0 */ + { 116, 0x9A, 0, 85, }, /* 5: 600 3.5 */ + { 154, 0x9A, 0, 64, }, /* 6: 600 6 */ + { 102, 0x9A, 0, 128, }, /* 7: 800 0 */ + { 154, 0x9A, 0, 85, }, /* 8: 800 3.5 */ + { 154, 0x9A, 1, 128, }, /* 9: 1200 0 */ }; static const struct bxt_ddi_buf_trans bxt_ddi_translations_edp[] = { /* Idx NT mV diff db */ - { 26, 0, 0, 128, false }, /* 0: 200 0 */ - { 38, 0, 0, 112, false }, /* 1: 200 1.5 */ - { 48, 0, 0, 96, false }, /* 2: 200 4 */ - { 54, 0, 0, 69, false }, /* 3: 200 6 */ - { 32, 0, 0, 128, false }, /* 4: 250 0 */ - { 48, 0, 0, 104, false }, /* 5: 250 1.5 */ - { 54, 0, 0, 85, false }, /* 6: 250 4 */ - { 43, 0, 0, 128, false }, /* 7: 300 0 */ - { 54, 0, 0, 101, false }, /* 8: 300 1.5 */ - { 48, 0, 0, 128, false }, /* 9: 300 0 */ + { 26, 0, 0, 128, }, /* 0: 200 0 */ + { 38, 0, 0, 112, }, /* 1: 200 1.5 */ + { 48, 0, 0, 96, }, /* 2: 200 4 */ + { 54, 0, 0, 69, }, /* 3: 200 6 */ + { 32, 0, 0, 128, }, /* 4: 250 0 */ + { 48, 0, 0, 104, }, /* 5: 250 1.5 */ + { 54, 0, 0, 85, }, /* 6: 250 4 */ + { 43, 0, 0, 128, }, /* 7: 300 0 */ + { 54, 0, 0, 101, }, /* 8: 300 1.5 */ + { 48, 0, 0, 128, }, /* 9: 300 0 */ }; /* BSpec has 2 recommended values - entries 0 and 8. @@ -341,16 +340,16 @@ static const struct bxt_ddi_buf_trans bxt_ddi_translations_edp[] = { */ static const struct bxt_ddi_buf_trans bxt_ddi_translations_hdmi[] = { /* Idx NT mV diff db */ - { 52, 0x9A, 0, 128, false }, /* 0: 400 0 */ - { 52, 0x9A, 0, 85, false }, /* 1: 400 3.5 */ - { 52, 0x9A, 0, 64, false }, /* 2: 400 6 */ - { 42, 0x9A, 0, 43, false }, /* 3: 400 9.5 */ - { 77, 0x9A, 0, 128, false }, /* 4: 600 0 */ - { 77, 0x9A, 0, 85, false }, /* 5: 600 3.5 */ - { 77, 0x9A, 0, 64, false }, /* 6: 600 6 */ - { 102, 0x9A, 0, 128, false }, /* 7: 800 0 */ - { 102, 0x9A, 0, 85, false }, /* 8: 800 3.5 */ - { 154, 0x9A, 1, 128, true }, /* 9: 1200 0 */ + { 52, 0x9A, 0, 128, }, /* 0: 400 0 */ + { 52, 0x9A, 0, 85, }, /* 1: 400 3.5 */ + { 52, 0x9A, 0, 64, }, /* 2: 400 6 */ + { 42, 0x9A, 0, 43, }, /* 3: 400 9.5 */ + { 77, 0x9A, 0, 128, }, /* 4: 600 0 */ + { 77, 0x9A, 0, 85, }, /* 5: 600 3.5 */ + { 77, 0x9A, 0, 64, }, /* 6: 600 6 */ + { 102, 0x9A, 0, 128, }, /* 7: 800 0 */ + { 102, 0x9A, 0, 85, }, /* 8: 800 3.5 */ + { 154, 0x9A, 1, 128, }, /* 9: 1200 0 */ }; struct cnl_ddi_buf_trans { @@ -588,6 +587,120 @@ skl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries) } } +static int skl_buf_trans_num_entries(enum port port, int n_entries) +{ + /* Only DDIA and DDIE can select the 10th register with DP */ + if (port == PORT_A || port == PORT_E) + return min(n_entries, 10); + else + return min(n_entries, 9); +} + +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_dp(struct drm_i915_private *dev_priv, + enum port port, int *n_entries) +{ + if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { + const struct ddi_buf_trans *ddi_translations = + kbl_get_buf_trans_dp(dev_priv, n_entries); + *n_entries = skl_buf_trans_num_entries(port, *n_entries); + return ddi_translations; + } else if (IS_SKYLAKE(dev_priv)) { + const struct ddi_buf_trans *ddi_translations = + skl_get_buf_trans_dp(dev_priv, n_entries); + *n_entries = skl_buf_trans_num_entries(port, *n_entries); + return ddi_translations; + } else if (IS_BROADWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(bdw_ddi_translations_dp); + return bdw_ddi_translations_dp; + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); + return hsw_ddi_translations_dp; + } + + *n_entries = 0; + return NULL; +} + +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_edp(struct drm_i915_private *dev_priv, + enum port port, int *n_entries) +{ + if (IS_GEN9_BC(dev_priv)) { + const struct ddi_buf_trans *ddi_translations = + skl_get_buf_trans_edp(dev_priv, n_entries); + *n_entries = skl_buf_trans_num_entries(port, *n_entries); + return ddi_translations; + } else if (IS_BROADWELL(dev_priv)) { + return bdw_get_buf_trans_edp(dev_priv, n_entries); + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); + return hsw_ddi_translations_dp; + } + + *n_entries = 0; + return NULL; +} + +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_fdi(struct drm_i915_private *dev_priv, + int *n_entries) +{ + if (IS_BROADWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(bdw_ddi_translations_fdi); + return bdw_ddi_translations_fdi; + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_fdi); + return hsw_ddi_translations_fdi; + } + + *n_entries = 0; + return NULL; +} + +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, + int *n_entries) +{ + if (IS_GEN9_BC(dev_priv)) { + return skl_get_buf_trans_hdmi(dev_priv, n_entries); + } else if (IS_BROADWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + return bdw_ddi_translations_hdmi; + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); + return hsw_ddi_translations_hdmi; + } + + *n_entries = 0; + return NULL; +} + +static const struct bxt_ddi_buf_trans * +bxt_get_buf_trans_dp(struct drm_i915_private *dev_priv, int *n_entries) +{ + *n_entries = ARRAY_SIZE(bxt_ddi_translations_dp); + return bxt_ddi_translations_dp; +} + +static const struct bxt_ddi_buf_trans * +bxt_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) +{ + if (dev_priv->vbt.edp.low_vswing) { + *n_entries = ARRAY_SIZE(bxt_ddi_translations_edp); + return bxt_ddi_translations_edp; + } + + return bxt_get_buf_trans_dp(dev_priv, n_entries); +} + +static const struct bxt_ddi_buf_trans * +bxt_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries) +{ + *n_entries = ARRAY_SIZE(bxt_ddi_translations_hdmi); + return bxt_ddi_translations_hdmi; +} + static const struct cnl_ddi_buf_trans * cnl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries) { @@ -602,8 +715,10 @@ cnl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries) } else if (voltage == VOLTAGE_INFO_1_05V) { *n_entries = ARRAY_SIZE(cnl_ddi_translations_hdmi_1_05V); return cnl_ddi_translations_hdmi_1_05V; - } else + } else { + *n_entries = 1; /* shut up gcc */ MISSING_CASE(voltage); + } return NULL; } @@ -621,8 +736,10 @@ cnl_get_buf_trans_dp(struct drm_i915_private *dev_priv, int *n_entries) } else if (voltage == VOLTAGE_INFO_1_05V) { *n_entries = ARRAY_SIZE(cnl_ddi_translations_dp_1_05V); return cnl_ddi_translations_dp_1_05V; - } else + } else { + *n_entries = 1; /* shut up gcc */ MISSING_CASE(voltage); + } return NULL; } @@ -641,8 +758,10 @@ cnl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) } else if (voltage == VOLTAGE_INFO_1_05V) { *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_1_05V); return cnl_ddi_translations_edp_1_05V; - } else + } else { + *n_entries = 1; /* shut up gcc */ MISSING_CASE(voltage); + } return NULL; } else { return cnl_get_buf_trans_dp(dev_priv, n_entries); @@ -651,92 +770,40 @@ cnl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port) { - int n_hdmi_entries; - int hdmi_level; - int hdmi_default_entry; - - hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; + int n_entries, level, default_entry; - if (IS_GEN9_LP(dev_priv)) - return hdmi_level; + level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; if (IS_CANNONLAKE(dev_priv)) { - cnl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); - hdmi_default_entry = n_hdmi_entries - 1; + cnl_get_buf_trans_hdmi(dev_priv, &n_entries); + default_entry = n_entries - 1; + } else if (IS_GEN9_LP(dev_priv)) { + bxt_get_buf_trans_hdmi(dev_priv, &n_entries); + default_entry = n_entries - 1; } else if (IS_GEN9_BC(dev_priv)) { - skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); - hdmi_default_entry = 8; + intel_ddi_get_buf_trans_hdmi(dev_priv, &n_entries); + default_entry = 8; } else if (IS_BROADWELL(dev_priv)) { - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - hdmi_default_entry = 7; + intel_ddi_get_buf_trans_hdmi(dev_priv, &n_entries); + default_entry = 7; } else if (IS_HASWELL(dev_priv)) { - n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); - hdmi_default_entry = 6; + intel_ddi_get_buf_trans_hdmi(dev_priv, &n_entries); + default_entry = 6; } else { WARN(1, "ddi translation table missing\n"); - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - hdmi_default_entry = 7; + return 0; } /* Choose a good default if VBT is badly populated */ - if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN || - hdmi_level >= n_hdmi_entries) - hdmi_level = hdmi_default_entry; - - return hdmi_level; -} - -static const struct ddi_buf_trans * -intel_ddi_get_buf_trans_dp(struct drm_i915_private *dev_priv, - int *n_entries) -{ - if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { - return kbl_get_buf_trans_dp(dev_priv, n_entries); - } else if (IS_SKYLAKE(dev_priv)) { - return skl_get_buf_trans_dp(dev_priv, n_entries); - } else if (IS_BROADWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - return bdw_ddi_translations_dp; - } else if (IS_HASWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); - return hsw_ddi_translations_dp; - } - - *n_entries = 0; - return NULL; -} - -static const struct ddi_buf_trans * -intel_ddi_get_buf_trans_edp(struct drm_i915_private *dev_priv, - int *n_entries) -{ - if (IS_GEN9_BC(dev_priv)) { - return skl_get_buf_trans_edp(dev_priv, n_entries); - } else if (IS_BROADWELL(dev_priv)) { - return bdw_get_buf_trans_edp(dev_priv, n_entries); - } else if (IS_HASWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); - return hsw_ddi_translations_dp; - } - - *n_entries = 0; - return NULL; -} + if (level == HDMI_LEVEL_SHIFT_UNKNOWN || level >= n_entries) + level = default_entry; -static const struct ddi_buf_trans * -intel_ddi_get_buf_trans_fdi(struct drm_i915_private *dev_priv, - int *n_entries) -{ - if (IS_BROADWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(hsw_ddi_translations_fdi); - return hsw_ddi_translations_fdi; - } else if (IS_HASWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(hsw_ddi_translations_fdi); - return hsw_ddi_translations_fdi; - } + if (WARN_ON_ONCE(n_entries == 0)) + return 0; + if (WARN_ON_ONCE(level >= n_entries)) + level = n_entries - 1; - *n_entries = 0; - return NULL; + return level; } /* @@ -754,11 +821,11 @@ static void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) switch (encoder->type) { case INTEL_OUTPUT_EDP: - ddi_translations = intel_ddi_get_buf_trans_edp(dev_priv, + ddi_translations = intel_ddi_get_buf_trans_edp(dev_priv, port, &n_entries); break; case INTEL_OUTPUT_DP: - ddi_translations = intel_ddi_get_buf_trans_dp(dev_priv, + ddi_translations = intel_ddi_get_buf_trans_dp(dev_priv, port, &n_entries); break; case INTEL_OUTPUT_ANALOG: @@ -770,16 +837,10 @@ static void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) return; } - if (IS_GEN9_BC(dev_priv)) { - /* If we're boosting the current, set bit 31 of trans1 */ - if (dev_priv->vbt.ddi_port_info[port].dp_boost_level) - iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; - - if (WARN_ON(encoder->type == INTEL_OUTPUT_EDP && - port != PORT_A && port != PORT_E && - n_entries > 9)) - n_entries = 9; - } + /* If we're boosting the current, set bit 31 of trans1 */ + if (IS_GEN9_BC(dev_priv) && + dev_priv->vbt.ddi_port_info[port].dp_boost_level) + iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; for (i = 0; i < n_entries; i++) { I915_WRITE(DDI_BUF_TRANS_LO(port, i), @@ -794,39 +855,32 @@ static void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) * values in advance. This function programs the correct values for * HDMI/DVI use cases. */ -static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder) +static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder, + int level) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; - int n_hdmi_entries, hdmi_level; + int n_entries; enum port port = intel_ddi_get_encoder_port(encoder); - const struct ddi_buf_trans *ddi_translations_hdmi; + const struct ddi_buf_trans *ddi_translations; - hdmi_level = intel_ddi_hdmi_level(dev_priv, port); + ddi_translations = intel_ddi_get_buf_trans_hdmi(dev_priv, &n_entries); - if (IS_GEN9_BC(dev_priv)) { - ddi_translations_hdmi = skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); + if (WARN_ON_ONCE(!ddi_translations)) + return; + if (WARN_ON_ONCE(level >= n_entries)) + level = n_entries - 1; - /* If we're boosting the current, set bit 31 of trans1 */ - if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level) - iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; - } else if (IS_BROADWELL(dev_priv)) { - ddi_translations_hdmi = bdw_ddi_translations_hdmi; - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - } else if (IS_HASWELL(dev_priv)) { - ddi_translations_hdmi = hsw_ddi_translations_hdmi; - n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); - } else { - WARN(1, "ddi translation table missing\n"); - ddi_translations_hdmi = bdw_ddi_translations_hdmi; - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - } + /* If we're boosting the current, set bit 31 of trans1 */ + if (IS_GEN9_BC(dev_priv) && + dev_priv->vbt.ddi_port_info[port].hdmi_boost_level) + iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; /* Entry 9 is for HDMI: */ I915_WRITE(DDI_BUF_TRANS_LO(port, 9), - ddi_translations_hdmi[hdmi_level].trans1 | iboost_bit); + ddi_translations[level].trans1 | iboost_bit); I915_WRITE(DDI_BUF_TRANS_HI(port, 9), - ddi_translations_hdmi[hdmi_level].trans2); + ddi_translations[level].trans2); } static void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv, @@ -1102,14 +1156,14 @@ static int hsw_ddi_calc_wrpll_link(struct drm_i915_private *dev_priv, } static int skl_calc_wrpll_link(struct drm_i915_private *dev_priv, - uint32_t dpll) + enum intel_dpll_id pll_id) { i915_reg_t cfgcr1_reg, cfgcr2_reg; uint32_t cfgcr1_val, cfgcr2_val; uint32_t p0, p1, p2, dco_freq; - cfgcr1_reg = DPLL_CFGCR1(dpll); - cfgcr2_reg = DPLL_CFGCR2(dpll); + cfgcr1_reg = DPLL_CFGCR1(pll_id); + cfgcr2_reg = DPLL_CFGCR2(pll_id); cfgcr1_val = I915_READ(cfgcr1_reg); cfgcr2_val = I915_READ(cfgcr2_reg); @@ -1162,7 +1216,7 @@ static int skl_calc_wrpll_link(struct drm_i915_private *dev_priv, } static int cnl_calc_wrpll_link(struct drm_i915_private *dev_priv, - uint32_t pll_id) + enum intel_dpll_id pll_id) { uint32_t cfgcr0, cfgcr1; uint32_t p0, p1, p2, dco_freq, ref_clock; @@ -1214,6 +1268,9 @@ static int cnl_calc_wrpll_link(struct drm_i915_private *dev_priv, dco_freq += (((cfgcr0 & DPLL_CFGCR0_DCO_FRACTION_MASK) >> DPLL_CFGCR0_DCO_FRACTION_SHIFT) * ref_clock) / 0x8000; + if (WARN_ON(p0 == 0 || p1 == 0 || p2 == 0)) + return 0; + return dco_freq / (p0 * p1 * p2 * 5); } @@ -1246,7 +1303,8 @@ static void cnl_ddi_clock_get(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int link_clock = 0; - uint32_t cfgcr0, pll_id; + uint32_t cfgcr0; + enum intel_dpll_id pll_id; pll_id = intel_get_shared_dpll_id(dev_priv, pipe_config->shared_dpll); @@ -1299,17 +1357,18 @@ static void skl_ddi_clock_get(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int link_clock = 0; - uint32_t dpll_ctl1, dpll; + uint32_t dpll_ctl1; + enum intel_dpll_id pll_id; - dpll = intel_get_shared_dpll_id(dev_priv, pipe_config->shared_dpll); + pll_id = intel_get_shared_dpll_id(dev_priv, pipe_config->shared_dpll); dpll_ctl1 = I915_READ(DPLL_CTRL1); - if (dpll_ctl1 & DPLL_CTRL1_HDMI_MODE(dpll)) { - link_clock = skl_calc_wrpll_link(dev_priv, dpll); + if (dpll_ctl1 & DPLL_CTRL1_HDMI_MODE(pll_id)) { + link_clock = skl_calc_wrpll_link(dev_priv, pll_id); } else { - link_clock = dpll_ctl1 & DPLL_CTRL1_LINK_RATE_MASK(dpll); - link_clock >>= DPLL_CTRL1_LINK_RATE_SHIFT(dpll); + link_clock = dpll_ctl1 & DPLL_CTRL1_LINK_RATE_MASK(pll_id); + link_clock >>= DPLL_CTRL1_LINK_RATE_SHIFT(pll_id); switch (link_clock) { case DPLL_CTRL1_LINK_RATE_810: @@ -1390,17 +1449,17 @@ static void hsw_ddi_clock_get(struct intel_encoder *encoder, } static int bxt_calc_pll_link(struct drm_i915_private *dev_priv, - enum intel_dpll_id dpll) + enum intel_dpll_id pll_id) { struct intel_shared_dpll *pll; struct intel_dpll_hw_state *state; struct dpll clock; /* For DDI ports we always use a shared PLL. */ - if (WARN_ON(dpll == DPLL_ID_PRIVATE)) + if (WARN_ON(pll_id == DPLL_ID_PRIVATE)) return 0; - pll = &dev_priv->shared_dplls[dpll]; + pll = &dev_priv->shared_dplls[pll_id]; state = &pll->state.hw_state; clock.m1 = 2; @@ -1419,9 +1478,9 @@ static void bxt_ddi_clock_get(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = intel_ddi_get_encoder_port(encoder); - uint32_t dpll = port; + enum intel_dpll_id pll_id = port; - pipe_config->port_clock = bxt_calc_pll_link(dev_priv, dpll); + pipe_config->port_clock = bxt_calc_pll_link(dev_priv, pll_id); ddi_dotclock_get(pipe_config); } @@ -1713,7 +1772,8 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, out: if (ret && IS_GEN9_LP(dev_priv)) { tmp = I915_READ(BXT_PHY_CTL(port)); - if ((tmp & (BXT_PHY_LANE_POWERDOWN_ACK | + if ((tmp & (BXT_PHY_CMNLANE_POWERDOWN_ACK | + BXT_PHY_LANE_POWERDOWN_ACK | BXT_PHY_LANE_ENABLED)) != BXT_PHY_LANE_ENABLED) DRM_ERROR("Port %c enabled but PHY powered down? " "(PHY_CTL %08x)\n", port_name(port), tmp); @@ -1772,54 +1832,36 @@ static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv, I915_WRITE(DISPIO_CR_TX_BMU_CR0, tmp); } -static void skl_ddi_set_iboost(struct intel_encoder *encoder, u32 level) +static void skl_ddi_set_iboost(struct intel_encoder *encoder, + int level, enum intel_output_type type) { struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); enum port port = intel_dig_port->port; - int type = encoder->type; - const struct ddi_buf_trans *ddi_translations; uint8_t iboost; - uint8_t dp_iboost, hdmi_iboost; - int n_entries; - /* VBT may override standard boost values */ - dp_iboost = dev_priv->vbt.ddi_port_info[port].dp_boost_level; - hdmi_iboost = dev_priv->vbt.ddi_port_info[port].hdmi_boost_level; + if (type == INTEL_OUTPUT_HDMI) + iboost = dev_priv->vbt.ddi_port_info[port].hdmi_boost_level; + else + iboost = dev_priv->vbt.ddi_port_info[port].dp_boost_level; - if (type == INTEL_OUTPUT_DP) { - if (dp_iboost) { - iboost = dp_iboost; - } else { - if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) - ddi_translations = kbl_get_buf_trans_dp(dev_priv, - &n_entries); - else - ddi_translations = skl_get_buf_trans_dp(dev_priv, - &n_entries); - iboost = ddi_translations[level].i_boost; - } - } else if (type == INTEL_OUTPUT_EDP) { - if (dp_iboost) { - iboost = dp_iboost; - } else { - ddi_translations = skl_get_buf_trans_edp(dev_priv, &n_entries); + if (iboost == 0) { + const struct ddi_buf_trans *ddi_translations; + int n_entries; - if (WARN_ON(port != PORT_A && - port != PORT_E && n_entries > 9)) - n_entries = 9; + if (type == INTEL_OUTPUT_HDMI) + ddi_translations = intel_ddi_get_buf_trans_hdmi(dev_priv, &n_entries); + else if (type == INTEL_OUTPUT_EDP) + ddi_translations = intel_ddi_get_buf_trans_edp(dev_priv, port, &n_entries); + else + ddi_translations = intel_ddi_get_buf_trans_dp(dev_priv, port, &n_entries); - iboost = ddi_translations[level].i_boost; - } - } else if (type == INTEL_OUTPUT_HDMI) { - if (hdmi_iboost) { - iboost = hdmi_iboost; - } else { - ddi_translations = skl_get_buf_trans_hdmi(dev_priv, &n_entries); - iboost = ddi_translations[level].i_boost; - } - } else { - return; + if (WARN_ON_ONCE(!ddi_translations)) + return; + if (WARN_ON_ONCE(level >= n_entries)) + level = n_entries - 1; + + iboost = ddi_translations[level].i_boost; } /* Make sure that the requested I_boost is valid */ @@ -1834,38 +1876,25 @@ static void skl_ddi_set_iboost(struct intel_encoder *encoder, u32 level) _skl_ddi_set_iboost(dev_priv, PORT_E, iboost); } -static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, - u32 level, enum port port, int type) +static void bxt_ddi_vswing_sequence(struct intel_encoder *encoder, + int level, enum intel_output_type type) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); const struct bxt_ddi_buf_trans *ddi_translations; - u32 n_entries, i; - - if (type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp.low_vswing) { - n_entries = ARRAY_SIZE(bxt_ddi_translations_edp); - ddi_translations = bxt_ddi_translations_edp; - } else if (type == INTEL_OUTPUT_DP - || type == INTEL_OUTPUT_EDP) { - n_entries = ARRAY_SIZE(bxt_ddi_translations_dp); - ddi_translations = bxt_ddi_translations_dp; - } else if (type == INTEL_OUTPUT_HDMI) { - n_entries = ARRAY_SIZE(bxt_ddi_translations_hdmi); - ddi_translations = bxt_ddi_translations_hdmi; - } else { - DRM_DEBUG_KMS("Vswing programming not done for encoder %d\n", - type); - return; - } + enum port port = encoder->port; + int n_entries; - /* Check if default value has to be used */ - if (level >= n_entries || - (type == INTEL_OUTPUT_HDMI && level == HDMI_LEVEL_SHIFT_UNKNOWN)) { - for (i = 0; i < n_entries; i++) { - if (ddi_translations[i].default_index) { - level = i; - break; - } - } - } + if (type == INTEL_OUTPUT_HDMI) + ddi_translations = bxt_get_buf_trans_hdmi(dev_priv, &n_entries); + else if (type == INTEL_OUTPUT_EDP) + ddi_translations = bxt_get_buf_trans_edp(dev_priv, &n_entries); + else + ddi_translations = bxt_get_buf_trans_dp(dev_priv, &n_entries); + + if (WARN_ON_ONCE(!ddi_translations)) + return; + if (WARN_ON_ONCE(level >= n_entries)) + level = n_entries - 1; bxt_ddi_phy_set_signal_level(dev_priv, port, ddi_translations[level].margin, @@ -1877,6 +1906,7 @@ static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = encoder->port; int n_entries; if (IS_CANNONLAKE(dev_priv)) { @@ -1884,11 +1914,16 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) cnl_get_buf_trans_edp(dev_priv, &n_entries); else cnl_get_buf_trans_dp(dev_priv, &n_entries); + } else if (IS_GEN9_LP(dev_priv)) { + if (encoder->type == INTEL_OUTPUT_EDP) + bxt_get_buf_trans_edp(dev_priv, &n_entries); + else + bxt_get_buf_trans_dp(dev_priv, &n_entries); } else { if (encoder->type == INTEL_OUTPUT_EDP) - intel_ddi_get_buf_trans_edp(dev_priv, &n_entries); + intel_ddi_get_buf_trans_edp(dev_priv, port, &n_entries); else - intel_ddi_get_buf_trans_dp(dev_priv, &n_entries); + intel_ddi_get_buf_trans_dp(dev_priv, port, &n_entries); } if (WARN_ON(n_entries < 1)) @@ -1900,28 +1935,26 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) DP_TRAIN_VOLTAGE_SWING_MASK; } -static void cnl_ddi_vswing_program(struct drm_i915_private *dev_priv, - u32 level, enum port port, int type) +static void cnl_ddi_vswing_program(struct intel_encoder *encoder, + int level, enum intel_output_type type) { - const struct cnl_ddi_buf_trans *ddi_translations = NULL; - u32 n_entries, val; - int ln; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = intel_ddi_get_encoder_port(encoder); + const struct cnl_ddi_buf_trans *ddi_translations; + int n_entries, ln; + u32 val; - if (type == INTEL_OUTPUT_HDMI) { + if (type == INTEL_OUTPUT_HDMI) ddi_translations = cnl_get_buf_trans_hdmi(dev_priv, &n_entries); - } else if (type == INTEL_OUTPUT_DP) { - ddi_translations = cnl_get_buf_trans_dp(dev_priv, &n_entries); - } else if (type == INTEL_OUTPUT_EDP) { + else if (type == INTEL_OUTPUT_EDP) ddi_translations = cnl_get_buf_trans_edp(dev_priv, &n_entries); - } + else + ddi_translations = cnl_get_buf_trans_dp(dev_priv, &n_entries); - if (WARN_ON(ddi_translations == NULL)) + if (WARN_ON_ONCE(!ddi_translations)) return; - - if (level >= n_entries) { - DRM_DEBUG_KMS("DDI translation not found for level %d. Using %d instead.", level, n_entries - 1); + if (WARN_ON_ONCE(level >= n_entries)) level = n_entries - 1; - } /* Set PORT_TX_DW5 Scaling Mode Sel to 010b. */ val = I915_READ(CNL_PORT_TX_DW5_LN0(port)); @@ -1966,26 +1999,22 @@ static void cnl_ddi_vswing_program(struct drm_i915_private *dev_priv, I915_WRITE(CNL_PORT_TX_DW7_GRP(port), val); } -static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, u32 level) +static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, + int level, enum intel_output_type type) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); enum port port = intel_ddi_get_encoder_port(encoder); - int type = encoder->type; - int width = 0; - int rate = 0; + int width, rate, ln; u32 val; - int ln = 0; - if ((intel_dp) && (type == INTEL_OUTPUT_EDP || type == INTEL_OUTPUT_DP)) { - width = intel_dp->lane_count; - rate = intel_dp->link_rate; - } else if (type == INTEL_OUTPUT_HDMI) { + if (type == INTEL_OUTPUT_HDMI) { width = 4; - /* Rate is always < than 6GHz for HDMI */ + rate = 0; /* Rate is always < than 6GHz for HDMI */ } else { - MISSING_CASE(type); - return; + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + + width = intel_dp->lane_count; + rate = intel_dp->link_rate; } /* @@ -1994,7 +2023,7 @@ static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, u32 level) * else clear to 0b. */ val = I915_READ(CNL_PORT_PCS_DW1_LN0(port)); - if (type == INTEL_OUTPUT_EDP || type == INTEL_OUTPUT_DP) + if (type != INTEL_OUTPUT_HDMI) val |= COMMON_KEEPER_EN; else val &= ~COMMON_KEEPER_EN; @@ -2029,7 +2058,7 @@ static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, u32 level) I915_WRITE(CNL_PORT_TX_DW5_GRP(port), val); /* 5. Program swing and de-emphasis */ - cnl_ddi_vswing_program(dev_priv, level, port, type); + cnl_ddi_vswing_program(encoder, level, type); /* 6. Set training enable to trigger update */ val = I915_READ(CNL_PORT_TX_DW5_LN0(port)); @@ -2066,13 +2095,12 @@ u32 bxt_signal_levels(struct intel_dp *intel_dp) struct intel_digital_port *dport = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); struct intel_encoder *encoder = &dport->base; - enum port port = dport->port; - u32 level = intel_ddi_dp_level(intel_dp); + int level = intel_ddi_dp_level(intel_dp); if (IS_CANNONLAKE(dev_priv)) - cnl_ddi_vswing_sequence(encoder, level); + cnl_ddi_vswing_sequence(encoder, level, encoder->type); else - bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type); + bxt_ddi_vswing_sequence(encoder, level, encoder->type); return 0; } @@ -2082,10 +2110,10 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) struct intel_digital_port *dport = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); struct intel_encoder *encoder = &dport->base; - uint32_t level = intel_ddi_dp_level(intel_dp); + int level = intel_ddi_dp_level(intel_dp); if (IS_GEN9_BC(dev_priv)) - skl_ddi_set_iboost(encoder, level); + skl_ddi_set_iboost(encoder, level, encoder->type); return DDI_BUF_TRANS_SELECT(level); } @@ -2112,8 +2140,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, * register writes. */ val = I915_READ(DPCLKA_CFGCR0); - val &= ~(DPCLKA_CFGCR0_DDI_CLK_OFF(port) | - DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port)); + val &= ~DPCLKA_CFGCR0_DDI_CLK_OFF(port); I915_WRITE(DPCLKA_CFGCR0, val); } else if (IS_GEN9_BC(dev_priv)) { /* DDI -> PLL mapping */ @@ -2131,47 +2158,61 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, } } +static void intel_ddi_clk_disable(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = intel_ddi_get_encoder_port(encoder); + + if (IS_CANNONLAKE(dev_priv)) + I915_WRITE(DPCLKA_CFGCR0, I915_READ(DPCLKA_CFGCR0) | + DPCLKA_CFGCR0_DDI_CLK_OFF(port)); + else if (IS_GEN9_BC(dev_priv)) + I915_WRITE(DPLL_CTRL2, I915_READ(DPLL_CTRL2) | + DPLL_CTRL2_DDI_CLK_OFF(port)); + else if (INTEL_GEN(dev_priv) < 9) + I915_WRITE(PORT_CLK_SEL(port), PORT_CLK_SEL_NONE); +} + static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, - int link_rate, uint32_t lane_count, - struct intel_shared_dpll *pll, - bool link_mst) + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = intel_ddi_get_encoder_port(encoder); struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); - uint32_t level = intel_ddi_dp_level(intel_dp); + bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); + int level = intel_ddi_dp_level(intel_dp); - WARN_ON(link_mst && (port == PORT_A || port == PORT_E)); + WARN_ON(is_mst && (port == PORT_A || port == PORT_E)); - intel_dp_set_link_params(intel_dp, link_rate, lane_count, - link_mst); - if (encoder->type == INTEL_OUTPUT_EDP) - intel_edp_panel_on(intel_dp); + intel_dp_set_link_params(intel_dp, crtc_state->port_clock, + crtc_state->lane_count, is_mst); - intel_ddi_clk_select(encoder, pll); + intel_edp_panel_on(intel_dp); + + intel_ddi_clk_select(encoder, crtc_state->shared_dpll); intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); if (IS_CANNONLAKE(dev_priv)) - cnl_ddi_vswing_sequence(encoder, level); + cnl_ddi_vswing_sequence(encoder, level, encoder->type); else if (IS_GEN9_LP(dev_priv)) - bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type); + bxt_ddi_vswing_sequence(encoder, level, encoder->type); else intel_prepare_dp_ddi_buffers(encoder); intel_ddi_init_dp_buf_reg(encoder); - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + if (!is_mst) + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); intel_dp_start_link_train(intel_dp); if (port != PORT_A || INTEL_GEN(dev_priv) >= 9) intel_dp_stop_link_train(intel_dp); } static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, - bool has_infoframe, const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state, - const struct intel_shared_dpll *pll) + const struct drm_connector_state *conn_state) { struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi; @@ -2181,67 +2222,49 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); intel_dp_dual_mode_set_tmds_output(intel_hdmi, true); - intel_ddi_clk_select(encoder, pll); + intel_ddi_clk_select(encoder, crtc_state->shared_dpll); intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); if (IS_CANNONLAKE(dev_priv)) - cnl_ddi_vswing_sequence(encoder, level); + cnl_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI); else if (IS_GEN9_LP(dev_priv)) - bxt_ddi_vswing_sequence(dev_priv, level, port, - INTEL_OUTPUT_HDMI); + bxt_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI); else - intel_prepare_hdmi_ddi_buffers(encoder); + intel_prepare_hdmi_ddi_buffers(encoder, level); if (IS_GEN9_BC(dev_priv)) - skl_ddi_set_iboost(encoder, level); + skl_ddi_set_iboost(encoder, level, INTEL_OUTPUT_HDMI); intel_dig_port->set_infoframes(&encoder->base, - has_infoframe, + crtc_state->has_infoframe, crtc_state, conn_state); } static void intel_ddi_pre_enable(struct intel_encoder *encoder, - const struct intel_crtc_state *pipe_config, + const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - int type = encoder->type; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; - if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { - intel_ddi_pre_enable_dp(encoder, - pipe_config->port_clock, - pipe_config->lane_count, - pipe_config->shared_dpll, - intel_crtc_has_type(pipe_config, - INTEL_OUTPUT_DP_MST)); - } - if (type == INTEL_OUTPUT_HDMI) { - intel_ddi_pre_enable_hdmi(encoder, - pipe_config->has_infoframe, - pipe_config, conn_state, - pipe_config->shared_dpll); - } + WARN_ON(crtc_state->has_pch_encoder); + + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + intel_ddi_pre_enable_hdmi(encoder, crtc_state, conn_state); + else + intel_ddi_pre_enable_dp(encoder, crtc_state, conn_state); } -static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, - const struct intel_crtc_state *old_crtc_state, - const struct drm_connector_state *old_conn_state) +static void intel_disable_ddi_buf(struct intel_encoder *encoder) { - struct drm_encoder *encoder = &intel_encoder->base; - struct drm_i915_private *dev_priv = to_i915(encoder->dev); - enum port port = intel_ddi_get_encoder_port(intel_encoder); - struct intel_digital_port *dig_port = enc_to_dig_port(encoder); - int type = intel_encoder->type; - uint32_t val; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = intel_ddi_get_encoder_port(encoder); bool wait = false; - - /* old_crtc_state and old_conn_state are NULL when called from DP_MST */ - - if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); - } + u32 val; val = I915_READ(DDI_BUF_CTL(port)); if (val & DDI_BUF_CTL_ENABLE) { @@ -2257,36 +2280,75 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, if (wait) intel_wait_ddi_buf_idle(dev_priv, port); +} - if (type == INTEL_OUTPUT_HDMI) { - dig_port->set_infoframes(encoder, false, - old_crtc_state, old_conn_state); - } +static void intel_ddi_post_disable_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_dp *intel_dp = &dig_port->dp; + /* + * old_crtc_state and old_conn_state are NULL when called from + * DP_MST. The main connector associated with this port is never + * bound to a crtc for MST. + */ + bool is_mst = !old_crtc_state; - if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + /* + * Power down sink before disabling the port, otherwise we end + * up getting interrupts from the sink on detecting link loss. + */ + if (!is_mst) + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); - intel_edp_panel_vdd_on(intel_dp); - intel_edp_panel_off(intel_dp); - } + intel_disable_ddi_buf(encoder); - if (dig_port) - intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain); + intel_edp_panel_vdd_on(intel_dp); + intel_edp_panel_off(intel_dp); - if (IS_CANNONLAKE(dev_priv)) - I915_WRITE(DPCLKA_CFGCR0, I915_READ(DPCLKA_CFGCR0) | - DPCLKA_CFGCR0_DDI_CLK_OFF(port)); - else if (IS_GEN9_BC(dev_priv)) - I915_WRITE(DPLL_CTRL2, (I915_READ(DPLL_CTRL2) | - DPLL_CTRL2_DDI_CLK_OFF(port))); - else if (INTEL_GEN(dev_priv) < 9) - I915_WRITE(PORT_CLK_SEL(port), PORT_CLK_SEL_NONE); + intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain); - if (type == INTEL_OUTPUT_HDMI) { - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); + intel_ddi_clk_disable(encoder); +} - intel_dp_dual_mode_set_tmds_output(intel_hdmi, false); - } +static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_hdmi *intel_hdmi = &dig_port->hdmi; + + intel_disable_ddi_buf(encoder); + + dig_port->set_infoframes(&encoder->base, false, + old_crtc_state, old_conn_state); + + intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain); + + intel_ddi_clk_disable(encoder); + + intel_dp_dual_mode_set_tmds_output(intel_hdmi, false); +} + +static void intel_ddi_post_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + /* + * old_crtc_state and old_conn_state are NULL when called from + * DP_MST. The main connector associated with this port is never + * bound to a crtc for MST. + */ + if (old_crtc_state && + intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_HDMI)) + intel_ddi_post_disable_hdmi(encoder, + old_crtc_state, old_conn_state); + else + intel_ddi_post_disable_dp(encoder, + old_crtc_state, old_conn_state); } void intel_ddi_fdi_post_disable(struct intel_encoder *encoder, @@ -2306,7 +2368,8 @@ void intel_ddi_fdi_post_disable(struct intel_encoder *encoder, val &= ~FDI_RX_ENABLE; I915_WRITE(FDI_RX_CTL(PIPE_A), val); - intel_ddi_post_disable(encoder, old_crtc_state, old_conn_state); + intel_disable_ddi_buf(encoder); + intel_ddi_clk_disable(encoder); val = I915_READ(FDI_RX_MISC(PIPE_A)); val &= ~(FDI_RX_PWRDN_LANE1_MASK | FDI_RX_PWRDN_LANE0_MASK); @@ -2322,70 +2385,93 @@ void intel_ddi_fdi_post_disable(struct intel_encoder *encoder, I915_WRITE(FDI_RX_CTL(PIPE_A), val); } -static void intel_enable_ddi(struct intel_encoder *intel_encoder, - const struct intel_crtc_state *pipe_config, - const struct drm_connector_state *conn_state) +static void intel_enable_ddi_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - struct drm_encoder *encoder = &intel_encoder->base; - struct drm_i915_private *dev_priv = to_i915(encoder->dev); - enum port port = intel_ddi_get_encoder_port(intel_encoder); - int type = intel_encoder->type; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + enum port port = intel_ddi_get_encoder_port(encoder); - if (type == INTEL_OUTPUT_HDMI) { - struct intel_digital_port *intel_dig_port = - enc_to_dig_port(encoder); - bool clock_ratio = pipe_config->hdmi_high_tmds_clock_ratio; - bool scrambling = pipe_config->hdmi_scrambling; - - intel_hdmi_handle_sink_scrambling(intel_encoder, - conn_state->connector, - clock_ratio, scrambling); - - /* In HDMI/DVI mode, the port width, and swing/emphasis values - * are ignored so nothing special needs to be done besides - * enabling the port. - */ - I915_WRITE(DDI_BUF_CTL(port), - intel_dig_port->saved_port_bits | - DDI_BUF_CTL_ENABLE); - } else if (type == INTEL_OUTPUT_EDP) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + if (port == PORT_A && INTEL_GEN(dev_priv) < 9) + intel_dp_stop_link_train(intel_dp); - if (port == PORT_A && INTEL_GEN(dev_priv) < 9) - intel_dp_stop_link_train(intel_dp); + intel_edp_backlight_on(crtc_state, conn_state); + intel_psr_enable(intel_dp, crtc_state); + intel_edp_drrs_enable(intel_dp, crtc_state); - intel_edp_backlight_on(pipe_config, conn_state); - intel_psr_enable(intel_dp, pipe_config); - intel_edp_drrs_enable(intel_dp, pipe_config); - } + if (crtc_state->has_audio) + intel_audio_codec_enable(encoder, crtc_state, conn_state); +} - if (pipe_config->has_audio) - intel_audio_codec_enable(intel_encoder, pipe_config, conn_state); +static void intel_enable_ddi_hdmi(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + enum port port = intel_ddi_get_encoder_port(encoder); + + intel_hdmi_handle_sink_scrambling(encoder, + conn_state->connector, + crtc_state->hdmi_high_tmds_clock_ratio, + crtc_state->hdmi_scrambling); + + /* In HDMI/DVI mode, the port width, and swing/emphasis values + * are ignored so nothing special needs to be done besides + * enabling the port. + */ + I915_WRITE(DDI_BUF_CTL(port), + dig_port->saved_port_bits | DDI_BUF_CTL_ENABLE); + + if (crtc_state->has_audio) + intel_audio_codec_enable(encoder, crtc_state, conn_state); } -static void intel_disable_ddi(struct intel_encoder *intel_encoder, - const struct intel_crtc_state *old_crtc_state, - const struct drm_connector_state *old_conn_state) +static void intel_enable_ddi(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - struct drm_encoder *encoder = &intel_encoder->base; - int type = intel_encoder->type; + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + intel_enable_ddi_hdmi(encoder, crtc_state, conn_state); + else + intel_enable_ddi_dp(encoder, crtc_state, conn_state); +} + +static void intel_disable_ddi_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); if (old_crtc_state->has_audio) - intel_audio_codec_disable(intel_encoder); + intel_audio_codec_disable(encoder); - if (type == INTEL_OUTPUT_HDMI) { - intel_hdmi_handle_sink_scrambling(intel_encoder, - old_conn_state->connector, - false, false); - } + intel_edp_drrs_disable(intel_dp, old_crtc_state); + intel_psr_disable(intel_dp, old_crtc_state); + intel_edp_backlight_off(old_conn_state); +} - if (type == INTEL_OUTPUT_EDP) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); +static void intel_disable_ddi_hdmi(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + if (old_crtc_state->has_audio) + intel_audio_codec_disable(encoder); - intel_edp_drrs_disable(intel_dp, old_crtc_state); - intel_psr_disable(intel_dp, old_crtc_state); - intel_edp_backlight_off(old_conn_state); - } + intel_hdmi_handle_sink_scrambling(encoder, + old_conn_state->connector, + false, false); +} + +static void intel_disable_ddi(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_HDMI)) + intel_disable_ddi_hdmi(encoder, old_crtc_state, old_conn_state); + else + intel_disable_ddi_dp(encoder, old_crtc_state, old_conn_state); } static void bxt_ddi_pre_pll_enable(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 615c58e48613..878acc432a4b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1539,7 +1539,7 @@ static void chv_enable_pll(struct intel_crtc *crtc, * DPLLCMD is AWOL. Use chicken bits to propagate * the value from DPLLBMD to either pipe B or C. */ - I915_WRITE(CBR4_VLV, pipe == PIPE_B ? CBR_DPLLBMD_PIPE_B : CBR_DPLLBMD_PIPE_C); + I915_WRITE(CBR4_VLV, CBR_DPLLBMD_PIPE(pipe)); I915_WRITE(DPLL_MD(PIPE_B), pipe_config->dpll_hw_state.dpll_md); I915_WRITE(CBR4_VLV, 0); dev_priv->chv_dpll_md[pipe] = pipe_config->dpll_hw_state.dpll_md; @@ -1568,11 +1568,12 @@ static int intel_num_dvo_pipes(struct drm_i915_private *dev_priv) return count; } -static void i9xx_enable_pll(struct intel_crtc *crtc) +static void i9xx_enable_pll(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); i915_reg_t reg = DPLL(crtc->pipe); - u32 dpll = crtc->config->dpll_hw_state.dpll; + u32 dpll = crtc_state->dpll_hw_state.dpll; int i; assert_pipe_disabled(dev_priv, crtc->pipe); @@ -1609,7 +1610,7 @@ static void i9xx_enable_pll(struct intel_crtc *crtc) if (INTEL_GEN(dev_priv) >= 4) { I915_WRITE(DPLL_MD(crtc->pipe), - crtc->config->dpll_hw_state.dpll_md); + crtc_state->dpll_hw_state.dpll_md); } else { /* The pixel multiplier can only be updated once the * DPLL is enabled and the clocks are stable. @@ -1627,15 +1628,6 @@ static void i9xx_enable_pll(struct intel_crtc *crtc) } } -/** - * i9xx_disable_pll - disable a PLL - * @dev_priv: i915 private structure - * @pipe: pipe PLL to disable - * - * Disable the PLL for @pipe, making sure the pipe is off first. - * - * Note! This is for pre-ILK only. - */ static void i9xx_disable_pll(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -2219,8 +2211,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) * something and try to run the system in a "less than optimal" * mode that matches the user configuration. */ - if (i915_vma_get_fence(vma) == 0) - i915_vma_pin_fence(vma); + i915_vma_pin_fence(vma); } i915_vma_get(vma); @@ -2856,7 +2847,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, if (intel_plane_ggtt_offset(state) == plane_config->base) { fb = c->primary->fb; - drm_framebuffer_reference(fb); + drm_framebuffer_get(fb); goto valid_fb; } } @@ -2887,7 +2878,7 @@ valid_fb: intel_crtc->pipe, PTR_ERR(intel_state->vma)); intel_state->vma = NULL; - drm_framebuffer_unreference(fb); + drm_framebuffer_put(fb); return; } @@ -2908,7 +2899,7 @@ valid_fb: if (i915_gem_object_is_tiled(obj)) dev_priv->preserve_bios_swizzle = true; - drm_framebuffer_reference(fb); + drm_framebuffer_get(fb); primary->fb = primary->state->fb = fb; primary->crtc = primary->state->crtc = &intel_crtc->base; @@ -3298,7 +3289,6 @@ static void i9xx_update_primary_plane(struct intel_plane *primary, const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(primary->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); const struct drm_framebuffer *fb = plane_state->base.fb; enum plane plane = primary->plane; u32 linear_offset; @@ -3307,16 +3297,14 @@ static void i9xx_update_primary_plane(struct intel_plane *primary, int x = plane_state->main.x; int y = plane_state->main.y; unsigned long irqflags; + u32 dspaddr_offset; linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); if (INTEL_GEN(dev_priv) >= 4) - crtc->dspaddr_offset = plane_state->main.offset; + dspaddr_offset = plane_state->main.offset; else - crtc->dspaddr_offset = linear_offset; - - crtc->adjusted_x = x; - crtc->adjusted_y = y; + dspaddr_offset = linear_offset; spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); @@ -3342,18 +3330,18 @@ static void i9xx_update_primary_plane(struct intel_plane *primary, if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { I915_WRITE_FW(DSPSURF(plane), intel_plane_ggtt_offset(plane_state) + - crtc->dspaddr_offset); + dspaddr_offset); I915_WRITE_FW(DSPOFFSET(plane), (y << 16) | x); } else if (INTEL_GEN(dev_priv) >= 4) { I915_WRITE_FW(DSPSURF(plane), intel_plane_ggtt_offset(plane_state) + - crtc->dspaddr_offset); + dspaddr_offset); I915_WRITE_FW(DSPTILEOFF(plane), (y << 16) | x); I915_WRITE_FW(DSPLINOFF(plane), linear_offset); } else { I915_WRITE_FW(DSPADDR(plane), intel_plane_ggtt_offset(plane_state) + - crtc->dspaddr_offset); + dspaddr_offset); } POSTING_READ_FW(reg); @@ -3553,100 +3541,6 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, return plane_ctl; } -static void skylake_update_primary_plane(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) -{ - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - const struct drm_framebuffer *fb = plane_state->base.fb; - enum plane_id plane_id = plane->id; - enum pipe pipe = plane->pipe; - u32 plane_ctl = plane_state->ctl; - unsigned int rotation = plane_state->base.rotation; - u32 stride = skl_plane_stride(fb, 0, rotation); - u32 aux_stride = skl_plane_stride(fb, 1, rotation); - u32 surf_addr = plane_state->main.offset; - int scaler_id = plane_state->scaler_id; - int src_x = plane_state->main.x; - int src_y = plane_state->main.y; - int src_w = drm_rect_width(&plane_state->base.src) >> 16; - int src_h = drm_rect_height(&plane_state->base.src) >> 16; - int dst_x = plane_state->base.dst.x1; - int dst_y = plane_state->base.dst.y1; - int dst_w = drm_rect_width(&plane_state->base.dst); - int dst_h = drm_rect_height(&plane_state->base.dst); - unsigned long irqflags; - - /* Sizes are 0 based */ - src_w--; - src_h--; - dst_w--; - dst_h--; - - crtc->dspaddr_offset = surf_addr; - - crtc->adjusted_x = src_x; - crtc->adjusted_y = src_y; - - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - - if (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) { - I915_WRITE_FW(PLANE_COLOR_CTL(pipe, plane_id), - PLANE_COLOR_PIPE_GAMMA_ENABLE | - PLANE_COLOR_PIPE_CSC_ENABLE | - PLANE_COLOR_PLANE_GAMMA_DISABLE); - } - - I915_WRITE_FW(PLANE_CTL(pipe, plane_id), plane_ctl); - I915_WRITE_FW(PLANE_OFFSET(pipe, plane_id), (src_y << 16) | src_x); - I915_WRITE_FW(PLANE_STRIDE(pipe, plane_id), stride); - I915_WRITE_FW(PLANE_SIZE(pipe, plane_id), (src_h << 16) | src_w); - I915_WRITE_FW(PLANE_AUX_DIST(pipe, plane_id), - (plane_state->aux.offset - surf_addr) | aux_stride); - I915_WRITE_FW(PLANE_AUX_OFFSET(pipe, plane_id), - (plane_state->aux.y << 16) | plane_state->aux.x); - - if (scaler_id >= 0) { - uint32_t ps_ctrl = 0; - - WARN_ON(!dst_w || !dst_h); - ps_ctrl = PS_SCALER_EN | PS_PLANE_SEL(plane_id) | - crtc_state->scaler_state.scalers[scaler_id].mode; - I915_WRITE_FW(SKL_PS_CTRL(pipe, scaler_id), ps_ctrl); - I915_WRITE_FW(SKL_PS_PWR_GATE(pipe, scaler_id), 0); - I915_WRITE_FW(SKL_PS_WIN_POS(pipe, scaler_id), (dst_x << 16) | dst_y); - I915_WRITE_FW(SKL_PS_WIN_SZ(pipe, scaler_id), (dst_w << 16) | dst_h); - I915_WRITE_FW(PLANE_POS(pipe, plane_id), 0); - } else { - I915_WRITE_FW(PLANE_POS(pipe, plane_id), (dst_y << 16) | dst_x); - } - - I915_WRITE_FW(PLANE_SURF(pipe, plane_id), - intel_plane_ggtt_offset(plane_state) + surf_addr); - - POSTING_READ_FW(PLANE_SURF(pipe, plane_id)); - - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); -} - -static void skylake_disable_primary_plane(struct intel_plane *primary, - struct intel_crtc *crtc) -{ - struct drm_i915_private *dev_priv = to_i915(primary->base.dev); - enum plane_id plane_id = primary->id; - enum pipe pipe = primary->pipe; - unsigned long irqflags; - - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - - I915_WRITE_FW(PLANE_CTL(pipe, plane_id), 0); - I915_WRITE_FW(PLANE_SURF(pipe, plane_id), 0); - POSTING_READ_FW(PLANE_SURF(pipe, plane_id)); - - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); -} - static int __intel_display_resume(struct drm_device *dev, struct drm_atomic_state *state, @@ -3782,6 +3676,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) intel_pps_unlock_regs_wa(dev_priv); intel_modeset_init_hw(dev); + intel_init_clock_gating(dev_priv); spin_lock_irq(&dev_priv->irq_lock); if (dev_priv->display.hpd_irq_setup) @@ -4955,10 +4850,10 @@ void hsw_enable_ips(struct intel_crtc *crtc) assert_plane_enabled(dev_priv, crtc->plane); if (IS_BROADWELL(dev_priv)) { - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, IPS_ENABLE | IPS_PCODE_CONTROL)); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); /* Quoting Art Runyan: "its not safe to expect any particular * value in IPS_CTL bit 31 after enabling IPS through the * mailbox." Moreover, the mailbox may return a bogus state, @@ -4988,9 +4883,9 @@ void hsw_disable_ips(struct intel_crtc *crtc) assert_plane_enabled(dev_priv, crtc->plane); if (IS_BROADWELL(dev_priv)) { - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0)); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); /* wait for pcode to finish disabling IPS, which may take up to 42ms */ if (intel_wait_for_register(dev_priv, IPS_CTL, IPS_ENABLE, 0, @@ -5459,6 +5354,20 @@ static bool hsw_crtc_supports_ips(struct intel_crtc *crtc) return HAS_IPS(to_i915(crtc->base.dev)) && crtc->pipe == PIPE_A; } +static void glk_pipe_scaler_clock_gating_wa(struct drm_i915_private *dev_priv, + enum pipe pipe, bool apply) +{ + u32 val = I915_READ(CLKGATE_DIS_PSL(pipe)); + u32 mask = DPF_GATING_DIS | DPF_RAM_GATING_DIS | DPFR_GATING_DIS; + + if (apply) + val |= mask; + else + val &= ~mask; + + I915_WRITE(CLKGATE_DIS_PSL(pipe), val); +} + static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, struct drm_atomic_state *old_state) { @@ -5469,13 +5378,11 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; struct intel_atomic_state *old_intel_state = to_intel_atomic_state(old_state); + bool psl_clkgate_wa; if (WARN_ON(intel_crtc->active)) return; - if (intel_crtc->config->has_pch_encoder) - intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); - intel_encoders_pre_pll_enable(crtc, pipe_config, old_state); if (intel_crtc->config->shared_dpll) @@ -5509,19 +5416,17 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_crtc->active = true; - if (intel_crtc->config->has_pch_encoder) - intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); - else - intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); - intel_encoders_pre_enable(crtc, pipe_config, old_state); - if (intel_crtc->config->has_pch_encoder) - dev_priv->display.fdi_link_train(intel_crtc, pipe_config); - if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_enable_pipe_clock(pipe_config); + /* Display WA #1180: WaDisableScalarClockGating: glk, cnl */ + psl_clkgate_wa = (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) && + intel_crtc->config->pch_pfit.enabled; + if (psl_clkgate_wa) + glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, true); + if (INTEL_GEN(dev_priv) >= 9) skylake_pfit_enable(intel_crtc); else @@ -5555,11 +5460,9 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_encoders_enable(crtc, pipe_config, old_state); - if (intel_crtc->config->has_pch_encoder) { - intel_wait_for_vblank(dev_priv, pipe); + if (psl_clkgate_wa) { intel_wait_for_vblank(dev_priv, pipe); - intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); - intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); + glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, false); } /* If we change the relative order between pipe/planes enabling, we need @@ -5655,9 +5558,6 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; - if (intel_crtc->config->has_pch_encoder) - intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); - intel_encoders_disable(crtc, old_crtc_state, old_state); drm_crtc_vblank_off(crtc); @@ -5682,9 +5582,6 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, intel_ddi_disable_pipe_clock(intel_crtc->config); intel_encoders_post_disable(crtc, old_crtc_state, old_state); - - if (old_crtc_state->has_pch_encoder) - intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); } static void i9xx_pfit_enable(struct intel_crtc *crtc) @@ -5894,7 +5791,7 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config, intel_encoders_pre_enable(crtc, pipe_config, old_state); - i9xx_enable_pll(intel_crtc); + i9xx_enable_pll(intel_crtc, pipe_config); i9xx_pfit_enable(intel_crtc); @@ -6146,6 +6043,19 @@ struct intel_connector *intel_connector_alloc(void) return connector; } +/* + * Free the bits allocated by intel_connector_alloc. + * This should only be used after intel_connector_alloc has returned + * successfully, and before drm_connector_init returns successfully. + * Otherwise the destroy callbacks for the connector and the state should + * take care of proper cleanup/free + */ +void intel_connector_free(struct intel_connector *connector) +{ + kfree(to_intel_digital_connector_state(connector->base.state)); + kfree(connector); +} + /* Simple connector->get_hw_state implementation for encoders that support only * one connector and no cloning and hence the encoder state determines the state * of the connector. */ @@ -6529,11 +6439,9 @@ static void i9xx_update_pll_dividers(struct intel_crtc *crtc, crtc_state->dpll_hw_state.fp0 = fp; - crtc->lowfreq_avail = false; if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_LVDS) && reduced_clock) { crtc_state->dpll_hw_state.fp1 = fp2; - crtc->lowfreq_avail = true; } else { crtc_state->dpll_hw_state.fp1 = fp; } @@ -7228,15 +7136,6 @@ static void i9xx_set_pipeconf(struct intel_crtc *intel_crtc) } } - if (HAS_PIPE_CXSR(dev_priv)) { - if (intel_crtc->lowfreq_avail) { - DRM_DEBUG_KMS("enabling CxSR downclocking\n"); - pipeconf |= PIPECONF_CXSR_DOWNCLOCK; - } else { - DRM_DEBUG_KMS("disabling CxSR downclocking\n"); - } - } - if (intel_crtc->config->base.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) { if (INTEL_GEN(dev_priv) < 4 || intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_SDVO)) @@ -8372,8 +8271,6 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc, memset(&crtc_state->dpll_hw_state, 0, sizeof(crtc_state->dpll_hw_state)); - crtc->lowfreq_avail = false; - /* CPU eDP is the only output that doesn't need a PCH PLL of its own. */ if (!crtc_state->has_pch_encoder) return 0; @@ -8846,11 +8743,11 @@ static uint32_t hsw_read_dcomp(struct drm_i915_private *dev_priv) static void hsw_write_dcomp(struct drm_i915_private *dev_priv, uint32_t val) { if (IS_HASWELL(dev_priv)) { - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); if (sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_D_COMP, val)) DRM_DEBUG_KMS("Failed to write to D_COMP\n"); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } else { I915_WRITE(D_COMP_BDW, val); POSTING_READ(D_COMP_BDW); @@ -9032,8 +8929,6 @@ static int haswell_crtc_compute_clock(struct intel_crtc *crtc, } } - crtc->lowfreq_avail = false; - return 0; } @@ -9853,7 +9748,7 @@ mode_fits_in_fbdev(struct drm_device *dev, if (obj->base.size < mode->vdisplay * fb->pitches[0]) return NULL; - drm_framebuffer_reference(fb); + drm_framebuffer_get(fb); return fb; #else return NULL; @@ -10034,7 +9929,7 @@ found: if (ret) goto fail; - drm_framebuffer_unreference(fb); + drm_framebuffer_put(fb); ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode); if (ret) @@ -10245,58 +10140,44 @@ static void ironlake_pch_clock_get(struct intel_crtc *crtc, &pipe_config->fdi_m_n); } -/** Returns the currently programmed mode of the given pipe. */ -struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, - struct drm_crtc *crtc) +/* Returns the currently programmed mode of the given encoder. */ +struct drm_display_mode * +intel_encoder_current_mode(struct intel_encoder *encoder) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc_state *crtc_state; struct drm_display_mode *mode; - struct intel_crtc_state *pipe_config; - int htot = I915_READ(HTOTAL(cpu_transcoder)); - int hsync = I915_READ(HSYNC(cpu_transcoder)); - int vtot = I915_READ(VTOTAL(cpu_transcoder)); - int vsync = I915_READ(VSYNC(cpu_transcoder)); - enum pipe pipe = intel_crtc->pipe; + struct intel_crtc *crtc; + enum pipe pipe; + + if (!encoder->get_hw_state(encoder, &pipe)) + return NULL; + + crtc = intel_get_crtc_for_pipe(dev_priv, pipe); mode = kzalloc(sizeof(*mode), GFP_KERNEL); if (!mode) return NULL; - pipe_config = kzalloc(sizeof(*pipe_config), GFP_KERNEL); - if (!pipe_config) { + crtc_state = kzalloc(sizeof(*crtc_state), GFP_KERNEL); + if (!crtc_state) { kfree(mode); return NULL; } - /* - * Construct a pipe_config sufficient for getting the clock info - * back out of crtc_clock_get. - * - * Note, if LVDS ever uses a non-1 pixel multiplier, we'll need - * to use a real value here instead. - */ - pipe_config->cpu_transcoder = (enum transcoder) pipe; - pipe_config->pixel_multiplier = 1; - pipe_config->dpll_hw_state.dpll = I915_READ(DPLL(pipe)); - pipe_config->dpll_hw_state.fp0 = I915_READ(FP0(pipe)); - pipe_config->dpll_hw_state.fp1 = I915_READ(FP1(pipe)); - i9xx_crtc_clock_get(intel_crtc, pipe_config); - - mode->clock = pipe_config->port_clock / pipe_config->pixel_multiplier; - mode->hdisplay = (htot & 0xffff) + 1; - mode->htotal = ((htot & 0xffff0000) >> 16) + 1; - mode->hsync_start = (hsync & 0xffff) + 1; - mode->hsync_end = ((hsync & 0xffff0000) >> 16) + 1; - mode->vdisplay = (vtot & 0xffff) + 1; - mode->vtotal = ((vtot & 0xffff0000) >> 16) + 1; - mode->vsync_start = (vsync & 0xffff) + 1; - mode->vsync_end = ((vsync & 0xffff0000) >> 16) + 1; + crtc_state->base.crtc = &crtc->base; - drm_mode_set_name(mode); + if (!dev_priv->display.get_pipe_config(crtc, crtc_state)) { + kfree(crtc_state); + kfree(mode); + return NULL; + } - kfree(pipe_config); + encoder->get_config(encoder, crtc_state); + + intel_mode_from_pipe_config(mode, crtc_state); + + kfree(crtc_state); return mode; } @@ -10683,6 +10564,52 @@ intel_dump_m_n_config(struct intel_crtc_state *pipe_config, char *id, m_n->link_m, m_n->link_n, m_n->tu); } +#define OUTPUT_TYPE(x) [INTEL_OUTPUT_ ## x] = #x + +static const char * const output_type_str[] = { + OUTPUT_TYPE(UNUSED), + OUTPUT_TYPE(ANALOG), + OUTPUT_TYPE(DVO), + OUTPUT_TYPE(SDVO), + OUTPUT_TYPE(LVDS), + OUTPUT_TYPE(TVOUT), + OUTPUT_TYPE(HDMI), + OUTPUT_TYPE(DP), + OUTPUT_TYPE(EDP), + OUTPUT_TYPE(DSI), + OUTPUT_TYPE(UNKNOWN), + OUTPUT_TYPE(DP_MST), +}; + +#undef OUTPUT_TYPE + +static void snprintf_output_types(char *buf, size_t len, + unsigned int output_types) +{ + char *str = buf; + int i; + + str[0] = '\0'; + + for (i = 0; i < ARRAY_SIZE(output_type_str); i++) { + int r; + + if ((output_types & BIT(i)) == 0) + continue; + + r = snprintf(str, len, "%s%s", + str != buf ? "," : "", output_type_str[i]); + if (r >= len) + break; + str += r; + len -= r; + + output_types &= ~BIT(i); + } + + WARN_ON_ONCE(output_types != 0); +} + static void intel_dump_pipe_config(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config, const char *context) @@ -10693,10 +10620,15 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, struct intel_plane *intel_plane; struct intel_plane_state *state; struct drm_framebuffer *fb; + char buf[64]; DRM_DEBUG_KMS("[CRTC:%d:%s]%s\n", crtc->base.base.id, crtc->base.name, context); + snprintf_output_types(buf, sizeof(buf), pipe_config->output_types); + DRM_DEBUG_KMS("output_types: %s (0x%x)\n", + buf, pipe_config->output_types); + DRM_DEBUG_KMS("cpu_transcoder: %s, pipe bpp: %i, dithering: %i\n", transcoder_name(pipe_config->cpu_transcoder), pipe_config->pipe_bpp, pipe_config->dither); @@ -11336,6 +11268,18 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_X(dpll_hw_state.ctrl1); PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr1); PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr2); + PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr0); + PIPE_CONF_CHECK_X(dpll_hw_state.ebb0); + PIPE_CONF_CHECK_X(dpll_hw_state.ebb4); + PIPE_CONF_CHECK_X(dpll_hw_state.pll0); + PIPE_CONF_CHECK_X(dpll_hw_state.pll1); + PIPE_CONF_CHECK_X(dpll_hw_state.pll2); + PIPE_CONF_CHECK_X(dpll_hw_state.pll3); + PIPE_CONF_CHECK_X(dpll_hw_state.pll6); + PIPE_CONF_CHECK_X(dpll_hw_state.pll8); + PIPE_CONF_CHECK_X(dpll_hw_state.pll9); + PIPE_CONF_CHECK_X(dpll_hw_state.pll10); + PIPE_CONF_CHECK_X(dpll_hw_state.pcsdw12); PIPE_CONF_CHECK_X(dsi_pll.ctrl); PIPE_CONF_CHECK_X(dsi_pll.div); @@ -12220,7 +12164,10 @@ static void skl_update_crtcs(struct drm_atomic_state *state) if (updated & cmask || !cstate->base.active) continue; - if (skl_ddb_allocation_overlaps(entries, &cstate->wm.skl.ddb, i)) + if (skl_ddb_allocation_overlaps(dev_priv, + entries, + &cstate->wm.skl.ddb, + i)) continue; updated |= cmask; @@ -12515,21 +12462,10 @@ static int intel_atomic_commit(struct drm_device *dev, struct drm_i915_private *dev_priv = to_i915(dev); int ret = 0; - ret = drm_atomic_helper_setup_commit(state, nonblock); - if (ret) - return ret; - drm_atomic_state_get(state); i915_sw_fence_init(&intel_state->commit_ready, intel_atomic_commit_ready); - ret = intel_atomic_prepare_commit(dev, state); - if (ret) { - DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret); - i915_sw_fence_commit(&intel_state->commit_ready); - return ret; - } - /* * The intel_legacy_cursor_update() fast path takes care * of avoiding the vblank waits for simple cursor @@ -12538,19 +12474,37 @@ static int intel_atomic_commit(struct drm_device *dev, * updates happen during the correct frames. Gen9+ have * double buffered watermarks and so shouldn't need this. * - * Do this after drm_atomic_helper_setup_commit() and - * intel_atomic_prepare_commit() because we still want - * to skip the flip and fb cleanup waits. Although that - * does risk yanking the mapping from under the display - * engine. + * Unset state->legacy_cursor_update before the call to + * drm_atomic_helper_setup_commit() because otherwise + * drm_atomic_helper_wait_for_flip_done() is a noop and + * we get FIFO underruns because we didn't wait + * for vblank. * * FIXME doing watermarks and fb cleanup from a vblank worker * (assuming we had any) would solve these problems. */ - if (INTEL_GEN(dev_priv) < 9) - state->legacy_cursor_update = false; + if (INTEL_GEN(dev_priv) < 9 && state->legacy_cursor_update) { + struct intel_crtc_state *new_crtc_state; + struct intel_crtc *crtc; + int i; + + for_each_new_intel_crtc_in_state(intel_state, crtc, new_crtc_state, i) + if (new_crtc_state->wm.need_postvbl_update || + new_crtc_state->update_wm_post) + state->legacy_cursor_update = false; + } + + ret = intel_atomic_prepare_commit(dev, state); + if (ret) { + DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret); + i915_sw_fence_commit(&intel_state->commit_ready); + return ret; + } + + ret = drm_atomic_helper_setup_commit(state, nonblock); + if (!ret) + ret = drm_atomic_helper_swap_state(state, true); - ret = drm_atomic_helper_swap_state(state, true); if (ret) { i915_sw_fence_commit(&intel_state->commit_ready); @@ -13228,8 +13182,8 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) num_formats = ARRAY_SIZE(skl_primary_formats); modifiers = skl_format_modifiers_ccs; - primary->update_plane = skylake_update_primary_plane; - primary->disable_plane = skylake_disable_primary_plane; + primary->update_plane = skl_update_plane; + primary->disable_plane = skl_disable_plane; } else if (INTEL_GEN(dev_priv) >= 9) { intel_primary_formats = skl_primary_formats; num_formats = ARRAY_SIZE(skl_primary_formats); @@ -13238,8 +13192,8 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) else modifiers = skl_format_modifiers_noccs; - primary->update_plane = skylake_update_primary_plane; - primary->disable_plane = skylake_disable_primary_plane; + primary->update_plane = skl_update_plane; + primary->disable_plane = skl_disable_plane; } else if (INTEL_GEN(dev_priv) >= 4) { intel_primary_formats = i965_primary_formats; num_formats = ARRAY_SIZE(i965_primary_formats); @@ -14397,8 +14351,6 @@ void intel_modeset_init_hw(struct drm_device *dev) intel_update_cdclk(dev_priv); dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw; - - intel_init_clock_gating(dev_priv); } /* @@ -14748,10 +14700,10 @@ static struct intel_connector *intel_encoder_find_connector(struct intel_encoder } static bool has_pch_trancoder(struct drm_i915_private *dev_priv, - enum transcoder pch_transcoder) + enum pipe pch_transcoder) { return HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) || - (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A); + (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == PIPE_A); } static void intel_sanitize_crtc(struct intel_crtc *crtc, @@ -14834,7 +14786,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, * PCH transcoders B and C would prevent enabling the south * error interrupt (see cpt_can_enable_serr_int()). */ - if (has_pch_trancoder(dev_priv, (enum transcoder)crtc->pipe)) + if (has_pch_trancoder(dev_priv, crtc->pipe)) crtc->pch_fifo_underrun_disabled = true; } } @@ -15110,6 +15062,15 @@ intel_modeset_setup_hw_state(struct drm_device *dev, struct intel_encoder *encoder; int i; + if (IS_HASWELL(dev_priv)) { + /* + * WaRsPkgCStateDisplayPMReq:hsw + * System hang if this isn't done before disabling all planes! + */ + I915_WRITE(CHICKEN_PAR1_1, + I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); + } + intel_modeset_readout_hw_state(dev); /* HW state is read out, now we need to sanitize this mess. */ @@ -15207,6 +15168,8 @@ void intel_modeset_gem_init(struct drm_device *dev) intel_init_gt_powersave(dev_priv); + intel_init_clock_gating(dev_priv); + intel_setup_overlay(dev_priv); } @@ -15233,6 +15196,23 @@ void intel_connector_unregister(struct drm_connector *connector) intel_panel_destroy_backlight(connector); } +static void intel_hpd_poll_fini(struct drm_device *dev) +{ + struct intel_connector *connector; + struct drm_connector_list_iter conn_iter; + + /* First disable polling... */ + drm_kms_helper_poll_fini(dev); + + /* Then kill the work that may have been queued by hpd. */ + drm_connector_list_iter_begin(dev, &conn_iter); + for_each_intel_connector_iter(connector, &conn_iter) { + if (connector->modeset_retry_work.func) + cancel_work_sync(&connector->modeset_retry_work); + } + drm_connector_list_iter_end(&conn_iter); +} + void intel_modeset_cleanup(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -15253,7 +15233,7 @@ void intel_modeset_cleanup(struct drm_device *dev) * Due to the hpd irq storm handling the hotplug work can re-arm the * poll handlers. Hence disable polling after hpd handling is shut down. */ - drm_kms_helper_poll_fini(dev); + intel_hpd_poll_fini(dev); /* poll work can call into fbdev, hence clean that up afterwards */ intel_fbdev_fini(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 90e756c76f10..158438bb0389 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -137,32 +137,20 @@ static void vlv_steal_power_sequencer(struct drm_device *dev, enum pipe pipe); static void intel_dp_unset_edid(struct intel_dp *intel_dp); -static int intel_dp_num_rates(u8 link_bw_code) -{ - switch (link_bw_code) { - default: - WARN(1, "invalid max DP link bw val %x, using 1.62Gbps\n", - link_bw_code); - case DP_LINK_BW_1_62: - return 1; - case DP_LINK_BW_2_7: - return 2; - case DP_LINK_BW_5_4: - return 3; - } -} - /* update sink rates from dpcd */ static void intel_dp_set_sink_rates(struct intel_dp *intel_dp) { - int i, num_rates; + int i, max_rate; - num_rates = intel_dp_num_rates(intel_dp->dpcd[DP_MAX_LINK_RATE]); + max_rate = drm_dp_bw_code_to_link_rate(intel_dp->dpcd[DP_MAX_LINK_RATE]); - for (i = 0; i < num_rates; i++) + for (i = 0; i < ARRAY_SIZE(default_rates); i++) { + if (default_rates[i] > max_rate) + break; intel_dp->sink_rates[i] = default_rates[i]; + } - intel_dp->num_sink_rates = num_rates; + intel_dp->num_sink_rates = i; } /* Theoretical max between source and sink */ @@ -254,15 +242,15 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) } else if (IS_GEN9_BC(dev_priv)) { source_rates = skl_rates; size = ARRAY_SIZE(skl_rates); - } else { + } else if ((IS_HASWELL(dev_priv) && !IS_HSW_ULX(dev_priv)) || + IS_BROADWELL(dev_priv)) { source_rates = default_rates; size = ARRAY_SIZE(default_rates); + } else { + source_rates = default_rates; + size = ARRAY_SIZE(default_rates) - 1; } - /* This depends on the fact that 5.4 is last value in the array */ - if (!intel_dp_source_supports_hbr2(intel_dp)) - size--; - intel_dp->source_rates = source_rates; intel_dp->num_source_rates = size; } @@ -1019,7 +1007,7 @@ static uint32_t g4x_get_aux_send_ctl(struct intel_dp *intel_dp, else precharge = 5; - if (IS_BROADWELL(dev_priv) && intel_dig_port->port == PORT_A) + if (IS_BROADWELL(dev_priv)) timeout = DP_AUX_CH_CTL_TIME_OUT_600us; else timeout = DP_AUX_CH_CTL_TIME_OUT_400us; @@ -1044,7 +1032,7 @@ static uint32_t skl_get_aux_send_ctl(struct intel_dp *intel_dp, DP_AUX_CH_CTL_DONE | (has_aux_irq ? DP_AUX_CH_CTL_INTERRUPT : 0) | DP_AUX_CH_CTL_TIME_OUT_ERROR | - DP_AUX_CH_CTL_TIME_OUT_1600us | + DP_AUX_CH_CTL_TIME_OUT_MAX | DP_AUX_CH_CTL_RECEIVE_ERROR | (send_bytes << DP_AUX_CH_CTL_MESSAGE_SIZE_SHIFT) | DP_AUX_CH_CTL_FW_SYNC_PULSE_SKL(32) | @@ -1482,14 +1470,9 @@ intel_dp_aux_init(struct intel_dp *intel_dp) bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp) { - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); + int max_rate = intel_dp->source_rates[intel_dp->num_source_rates - 1]; - if ((IS_HASWELL(dev_priv) && !IS_HSW_ULX(dev_priv)) || - IS_BROADWELL(dev_priv) || (INTEL_GEN(dev_priv) >= 9)) - return true; - else - return false; + return max_rate >= 540000; } static void @@ -1849,6 +1832,8 @@ found: if (!HAS_DDI(dev_priv)) intel_dp_set_clock(encoder, pipe_config); + intel_psr_compute_config(intel_dp, pipe_config); + return true; } @@ -2308,8 +2293,8 @@ static void edp_panel_off(struct intel_dp *intel_dp) I915_WRITE(pp_ctrl_reg, pp); POSTING_READ(pp_ctrl_reg); - intel_dp->panel_power_off_time = ktime_get_boottime(); wait_panel_off(intel_dp); + intel_dp->panel_power_off_time = ktime_get_boottime(); /* We got a reference when we enabled the VDD. */ intel_display_power_put(dev_priv, intel_dp->aux_power_domain); @@ -3170,9 +3155,7 @@ intel_dp_voltage_max(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); enum port port = dp_to_dig_port(intel_dp)->port; - if (IS_GEN9_LP(dev_priv)) - return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; - else if (INTEL_GEN(dev_priv) >= 9) { + if (INTEL_GEN(dev_priv) >= 9) { struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; return intel_ddi_dp_voltage_max(encoder); } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) @@ -3752,9 +3735,16 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) } - /* Read the eDP Display control capabilities registers */ - if ((intel_dp->dpcd[DP_EDP_CONFIGURATION_CAP] & DP_DPCD_DISPLAY_CONTROL_CAPABLE) && - drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_DPCD_REV, + /* + * Read the eDP display control registers. + * + * Do this independent of DP_DPCD_DISPLAY_CONTROL_CAPABLE bit in + * DP_EDP_CONFIGURATION_CAP, because some buggy displays do not have it + * set, but require eDP 1.4+ detection (e.g. for supported link rates + * method). The display control registers should read zero if they're + * not supported anyway. + */ + if (drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_DPCD_REV, intel_dp->edp_dpcd, sizeof(intel_dp->edp_dpcd)) == sizeof(intel_dp->edp_dpcd)) DRM_DEBUG_KMS("EDP DPCD : %*ph\n", (int) sizeof(intel_dp->edp_dpcd), @@ -5286,7 +5276,7 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, * seems sufficient to avoid this problem. */ if (dev_priv->quirks & QUIRK_INCREASE_T12_DELAY) { - vbt.t11_t12 = max_t(u16, vbt.t11_t12, 900 * 10); + vbt.t11_t12 = max_t(u16, vbt.t11_t12, 1300 * 10); DRM_DEBUG_KMS("Increasing T12 panel delay as per the quirk to %d\n", vbt.t11_t12); } diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 9a396f483f8b..772521440a9f 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -162,14 +162,19 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder, drm_dp_mst_deallocate_vcpi(&intel_dp->mst_mgr, connector->port); + /* + * Power down mst path before disabling the port, otherwise we end + * up getting interrupts from the sink upon detecting link loss. + */ + drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, + false); + intel_dp->active_mst_links--; intel_mst->connector = NULL; if (intel_dp->active_mst_links == 0) { intel_dig_port->base.post_disable(&intel_dig_port->base, NULL, NULL); - - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); } DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); } @@ -196,6 +201,7 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); + drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, true); if (intel_dp->active_mst_links == 0) intel_dig_port->base.pre_enable(&intel_dig_port->base, pipe_config, NULL); @@ -448,32 +454,52 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo struct intel_dp *intel_dp = container_of(mgr, struct intel_dp, mst_mgr); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); struct intel_connector *intel_connector; struct drm_connector *connector; - int i; + enum pipe pipe; + int ret; intel_connector = intel_connector_alloc(); if (!intel_connector) return NULL; connector = &intel_connector->base; - drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs, DRM_MODE_CONNECTOR_DisplayPort); + ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs, + DRM_MODE_CONNECTOR_DisplayPort); + if (ret) { + intel_connector_free(intel_connector); + return NULL; + } + drm_connector_helper_add(connector, &intel_dp_mst_connector_helper_funcs); intel_connector->get_hw_state = intel_dp_mst_get_hw_state; intel_connector->mst_port = intel_dp; intel_connector->port = port; - for (i = PIPE_A; i <= PIPE_C; i++) { - drm_mode_connector_attach_encoder(&intel_connector->base, - &intel_dp->mst_encoders[i]->base.base); + for_each_pipe(dev_priv, pipe) { + struct drm_encoder *enc = + &intel_dp->mst_encoders[pipe]->base.base; + + ret = drm_mode_connector_attach_encoder(&intel_connector->base, + enc); + if (ret) + goto err; } drm_object_attach_property(&connector->base, dev->mode_config.path_property, 0); drm_object_attach_property(&connector->base, dev->mode_config.tile_property, 0); - drm_mode_connector_set_path_property(connector, pathprop); + ret = drm_mode_connector_set_path_property(connector, pathprop); + if (ret) + goto err; + return connector; + +err: + drm_connector_cleanup(connector); + return NULL; } static void intel_dp_register_mst_connector(struct drm_connector *connector) @@ -563,11 +589,12 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum static bool intel_dp_create_fake_mst_encoders(struct intel_digital_port *intel_dig_port) { - int i; struct intel_dp *intel_dp = &intel_dig_port->dp; + struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); + enum pipe pipe; - for (i = PIPE_A; i <= PIPE_C; i++) - intel_dp->mst_encoders[i] = intel_dp_create_fake_mst_encoder(intel_dig_port, i); + for_each_pipe(dev_priv, pipe) + intel_dp->mst_encoders[pipe] = intel_dp_create_fake_mst_encoder(intel_dig_port, pipe); return true; } diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index 09b670929786..de38d014ed39 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -208,12 +208,6 @@ static const struct bxt_ddi_phy_info glk_ddi_phy_info[] = { }, }; -static u32 bxt_phy_port_mask(const struct bxt_ddi_phy_info *phy_info) -{ - return (phy_info->dual_channel * BIT(phy_info->channel[DPIO_CH1].port)) | - BIT(phy_info->channel[DPIO_CH0].port); -} - static const struct bxt_ddi_phy_info * bxt_get_phy_list(struct drm_i915_private *dev_priv, int *count) { @@ -313,7 +307,6 @@ bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, enum dpio_phy phy) { const struct bxt_ddi_phy_info *phy_info; - enum port port; phy_info = bxt_get_phy_info(dev_priv, phy); @@ -335,19 +328,6 @@ bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, return false; } - for_each_port_masked(port, bxt_phy_port_mask(phy_info)) { - u32 tmp = I915_READ(BXT_PHY_CTL(port)); - - if (tmp & BXT_PHY_CMNLANE_POWERDOWN_ACK) { - DRM_DEBUG_DRIVER("DDI PHY %d powered, but common lane " - "for port %c powered down " - "(PHY_CTL %08x)\n", - phy, port_name(port), tmp); - - return false; - } - } - return true; } diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index a2a3d93d67bd..df808a94c511 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -1996,7 +1996,7 @@ static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv, /* 3. Configure DPLL_CFGCR0 */ /* Avoid touch CFGCR1 if HDMI mode is not enabled */ - if (pll->state.hw_state.cfgcr0 & DPLL_CTRL1_HDMI_MODE(pll->id)) { + if (pll->state.hw_state.cfgcr0 & DPLL_CFGCR0_HDMI_MODE) { val = pll->state.hw_state.cfgcr1; I915_WRITE(CNL_DPLL_CFGCR1(pll->id), val); /* 4. Reab back to ensure writes completed */ diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 0cab667fff57..7bc60c848940 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -499,7 +499,6 @@ struct intel_crtc_scaler_state { struct intel_pipe_wm { struct intel_wm_level wm[5]; - struct intel_wm_level raw_wm[5]; uint32_t linetime; bool fbc_wm_enabled; bool pipe_enabled; @@ -718,6 +717,9 @@ struct intel_crtc_state { struct intel_link_m_n dp_m2_n2; bool has_drrs; + bool has_psr; + bool has_psr2; + /* * Frequence the dpll for the port should run at. Differs from the * adjusted dotclock e.g. for DP or 12bpc hdmi mode. This is also @@ -800,18 +802,10 @@ struct intel_crtc { * some outputs connected to this crtc. */ bool active; - bool lowfreq_avail; u8 plane_ids_mask; unsigned long long enabled_power_domains; struct intel_overlay *overlay; - /* Display surface base address adjustement for pageflips. Note that on - * gen4+ this only adjusts up to a tile, offsets within a tile are - * handled in the hw itself (with the TILEOFF register). */ - u32 dspaddr_offset; - int adjusted_x; - int adjusted_y; - struct intel_crtc_state *config; /* global reset count when the last flip was submitted */ @@ -1066,7 +1060,7 @@ struct intel_digital_port { void (*write_infoframe)(struct drm_encoder *encoder, const struct intel_crtc_state *crtc_state, - enum hdmi_infoframe_type type, + unsigned int type, const void *frame, ssize_t len); void (*set_infoframes)(struct drm_encoder *encoder, bool enable, @@ -1243,7 +1237,7 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv); static inline u32 gen6_sanitize_rps_pm_mask(const struct drm_i915_private *i915, u32 mask) { - return mask & ~i915->rps.pm_intrmsk_mbz; + return mask & ~i915->gt_pm.rps.pm_intrmsk_mbz; } void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv); @@ -1254,7 +1248,7 @@ static inline bool intel_irqs_enabled(struct drm_i915_private *dev_priv) * We only use drm_irq_uninstall() at unload and VT switch, so * this is the only thing we need to check. */ - return dev_priv->pm.irqs_enabled; + return dev_priv->runtime_pm.irqs_enabled; } int intel_get_crtc_scanline(struct intel_crtc *crtc); @@ -1360,11 +1354,13 @@ void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv); void intel_encoder_destroy(struct drm_encoder *encoder); int intel_connector_init(struct intel_connector *); struct intel_connector *intel_connector_alloc(void); +void intel_connector_free(struct intel_connector *connector); bool intel_connector_get_hw_state(struct intel_connector *connector); void intel_connector_attach_encoder(struct intel_connector *connector, struct intel_encoder *encoder); -struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, - struct drm_crtc *crtc); +struct drm_display_mode * +intel_encoder_current_mode(struct intel_encoder *encoder); + enum pipe intel_get_pipe_from_connector(struct intel_connector *connector); int intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data, struct drm_file *file_priv); @@ -1763,6 +1759,8 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, void intel_psr_init(struct drm_i915_private *dev_priv); void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits); +void intel_psr_compute_config(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state); /* intel_runtime_pm.c */ int intel_power_domains_init(struct drm_i915_private *); @@ -1790,7 +1788,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, static inline void assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv) { - WARN_ONCE(dev_priv->pm.suspended, + WARN_ONCE(dev_priv->runtime_pm.suspended, "Device suspended during HW access\n"); } @@ -1798,7 +1796,7 @@ static inline void assert_rpm_wakelock_held(struct drm_i915_private *dev_priv) { assert_rpm_device_not_suspended(dev_priv); - WARN_ONCE(!atomic_read(&dev_priv->pm.wakeref_count), + WARN_ONCE(!atomic_read(&dev_priv->runtime_pm.wakeref_count), "RPM wakelock ref not held during HW access"); } @@ -1823,7 +1821,7 @@ assert_rpm_wakelock_held(struct drm_i915_private *dev_priv) static inline void disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) { - atomic_inc(&dev_priv->pm.wakeref_count); + atomic_inc(&dev_priv->runtime_pm.wakeref_count); } /** @@ -1840,7 +1838,7 @@ disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) static inline void enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) { - atomic_dec(&dev_priv->pm.wakeref_count); + atomic_dec(&dev_priv->runtime_pm.wakeref_count); } void intel_runtime_pm_get(struct drm_i915_private *dev_priv); @@ -1893,7 +1891,8 @@ int intel_enable_sagv(struct drm_i915_private *dev_priv); int intel_disable_sagv(struct drm_i915_private *dev_priv); bool skl_wm_level_equals(const struct skl_wm_level *l1, const struct skl_wm_level *l2); -bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, +bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv, + const struct skl_ddb_entry **entries, const struct skl_ddb_entry *ddb, int ignore); bool ilk_disable_lp_wm(struct drm_device *dev); @@ -1902,7 +1901,7 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, struct intel_crtc_state *cstate); void intel_init_ipc(struct drm_i915_private *dev_priv); void intel_enable_ipc(struct drm_i915_private *dev_priv); -static inline int intel_enable_rc6(void) +static inline int intel_rc6_enabled(void) { return i915_modparams.enable_rc6; } @@ -1921,6 +1920,10 @@ int intel_sprite_set_colorkey(struct drm_device *dev, void *data, struct drm_file *file_priv); void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state); void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state); +void skl_update_plane(struct intel_plane *plane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state); +void skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc); /* intel_tv.c */ void intel_tv_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 20a7b004ffd7..83f15848098a 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -790,14 +790,19 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct drm_crtc *crtc = pipe_config->base.crtc; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pipe = intel_crtc->pipe; enum port port; u32 val; bool glk_cold_boot = false; DRM_DEBUG_KMS("\n"); + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); + /* * The BIOS may leave the PLL in a wonky state where it doesn't * lock. It needs to be fully powered down to fix it. @@ -1746,42 +1751,13 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) else intel_encoder->crtc_mask = BIT(PIPE_B); - if (dev_priv->vbt.dsi.config->dual_link) { + if (dev_priv->vbt.dsi.config->dual_link) intel_dsi->ports = BIT(PORT_A) | BIT(PORT_C); - - switch (dev_priv->vbt.dsi.config->dl_dcs_backlight_ports) { - case DL_DCS_PORT_A: - intel_dsi->dcs_backlight_ports = BIT(PORT_A); - break; - case DL_DCS_PORT_C: - intel_dsi->dcs_backlight_ports = BIT(PORT_C); - break; - default: - case DL_DCS_PORT_A_AND_C: - intel_dsi->dcs_backlight_ports = BIT(PORT_A) | BIT(PORT_C); - break; - } - - switch (dev_priv->vbt.dsi.config->dl_dcs_cabc_ports) { - case DL_DCS_PORT_A: - intel_dsi->dcs_cabc_ports = BIT(PORT_A); - break; - case DL_DCS_PORT_C: - intel_dsi->dcs_cabc_ports = BIT(PORT_C); - break; - default: - case DL_DCS_PORT_A_AND_C: - intel_dsi->dcs_cabc_ports = BIT(PORT_A) | BIT(PORT_C); - break; - } - } else { + else intel_dsi->ports = BIT(port); - intel_dsi->dcs_backlight_ports = BIT(port); - intel_dsi->dcs_cabc_ports = BIT(port); - } - if (!dev_priv->vbt.dsi.config->cabc_supported) - intel_dsi->dcs_cabc_ports = 0; + intel_dsi->dcs_backlight_ports = dev_priv->vbt.dsi.bl_ports; + intel_dsi->dcs_cabc_ports = dev_priv->vbt.dsi.cabc_ports; /* Create a DSI host (and a device) for each port. */ for_each_dsi_port(port, intel_dsi->ports) { diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index 5c562e1f56ae..53c9b763f4ce 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -379,32 +379,15 @@ static const struct drm_encoder_funcs intel_dvo_enc_funcs = { * chip being on DVOB/C and having multiple pipes. */ static struct drm_display_mode * -intel_dvo_get_current_mode(struct drm_connector *connector) +intel_dvo_get_current_mode(struct intel_encoder *encoder) { - struct drm_device *dev = connector->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_dvo *intel_dvo = intel_attached_dvo(connector); - uint32_t dvo_val = I915_READ(intel_dvo->dev.dvo_reg); - struct drm_display_mode *mode = NULL; + struct drm_display_mode *mode; - /* If the DVO port is active, that'll be the LVDS, so we can pull out - * its timings to get how the BIOS set up the panel. - */ - if (dvo_val & DVO_ENABLE) { - struct intel_crtc *crtc; - int pipe = (dvo_val & DVO_PIPE_B_SELECT) ? 1 : 0; - - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - if (crtc) { - mode = intel_crtc_mode_get(dev, &crtc->base); - if (mode) { - mode->type |= DRM_MODE_TYPE_PREFERRED; - if (dvo_val & DVO_HSYNC_ACTIVE_HIGH) - mode->flags |= DRM_MODE_FLAG_PHSYNC; - if (dvo_val & DVO_VSYNC_ACTIVE_HIGH) - mode->flags |= DRM_MODE_FLAG_PVSYNC; - } - } + mode = intel_encoder_current_mode(encoder); + if (mode) { + DRM_DEBUG_KMS("using current (BIOS) mode: "); + drm_mode_debug_printmodeline(mode); + mode->type |= DRM_MODE_TYPE_PREFERRED; } return mode; @@ -551,7 +534,7 @@ void intel_dvo_init(struct drm_i915_private *dev_priv) * mode being output through DVO. */ intel_panel_init(&intel_connector->panel, - intel_dvo_get_current_mode(connector), + intel_dvo_get_current_mode(intel_encoder), NULL, NULL); intel_dvo->panel_wants_dither = true; } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index a28e2a864cf1..ab5bf4e2e28e 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -22,7 +22,10 @@ * */ +#include <drm/drm_print.h> + #include "i915_drv.h" +#include "i915_vgpu.h" #include "intel_ringbuffer.h" #include "intel_lrc.h" @@ -39,7 +42,7 @@ #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) -#define GEN10_LR_CONTEXT_RENDER_SIZE (19 * PAGE_SIZE) +#define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE) #define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) @@ -384,10 +387,6 @@ static void intel_engine_init_timeline(struct intel_engine_cs *engine) static bool csb_force_mmio(struct drm_i915_private *i915) { - /* GVT emulation depends upon intercepting CSB mmio */ - if (intel_vgpu_active(i915)) - return true; - /* * IOMMU adds unpredictable latency causing the CSB write (from the * GPU into the HWSP) to only be visible some time after the interrupt @@ -396,6 +395,10 @@ static bool csb_force_mmio(struct drm_i915_private *i915) if (intel_vtd_active()) return true; + /* Older GVT emulation depends upon intercepting CSB mmio */ + if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) + return true; + return false; } @@ -613,9 +616,22 @@ int intel_engine_init_common(struct intel_engine_cs *engine) if (IS_ERR(ring)) return PTR_ERR(ring); + /* + * Similarly the preempt context must always be available so that + * we can interrupt the engine at any time. + */ + if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) { + ring = engine->context_pin(engine, + engine->i915->preempt_context); + if (IS_ERR(ring)) { + ret = PTR_ERR(ring); + goto err_unpin_kernel; + } + } + ret = intel_engine_init_breadcrumbs(engine); if (ret) - goto err_unpin; + goto err_unpin_preempt; ret = i915_gem_render_state_init(engine); if (ret) @@ -634,7 +650,10 @@ err_rs_fini: i915_gem_render_state_fini(engine); err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); -err_unpin: +err_unpin_preempt: + if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) + engine->context_unpin(engine, engine->i915->preempt_context); +err_unpin_kernel: engine->context_unpin(engine, engine->i915->kernel_context); return ret; } @@ -660,6 +679,8 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) intel_engine_cleanup_cmd_parser(engine); i915_gem_batch_pool_fini(&engine->batch_pool); + if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) + engine->context_unpin(engine, engine->i915->preempt_context); engine->context_unpin(engine, engine->i915->kernel_context); } @@ -830,11 +851,6 @@ static int wa_add(struct drm_i915_private *dev_priv, #define WA_SET_FIELD_MASKED(addr, mask, value) \ WA_REG(addr, mask, _MASKED_FIELD(mask, value)) -#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) -#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) - -#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) - static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, i915_reg_t reg) { @@ -845,8 +861,8 @@ static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) return -EINVAL; - WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), - i915_mmio_reg_offset(reg)); + I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), + i915_mmio_reg_offset(reg)); wa->hw_whitelist_count[engine->id]++; return 0; @@ -980,7 +996,11 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) GEN9_PBE_COMPRESSED_HASH_SELECTION); WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); - WA_SET_BIT(MMCD_MISC_CTRL, MMCD_PCLA | MMCD_HOTSPOT_EN); + + I915_WRITE(MMCD_MISC_CTRL, + I915_READ(MMCD_MISC_CTRL) | + MMCD_PCLA | + MMCD_HOTSPOT_EN); } /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ @@ -1071,13 +1091,33 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES)); + /* + * Supporting preemption with fine-granularity requires changes in the + * batch buffer programming. Since we can't break old userspace, we + * need to set our default preemption level to safe value. Userspace is + * still able to use more fine-grained preemption levels, since in + * WaEnablePreemptionGranularityControlByUMD we're whitelisting the + * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are + * not real HW workarounds, but merely a way to start using preemption + * while maintaining old contract with userspace. + */ + + /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ + WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); + + /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); if (ret) return ret; - /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl */ - ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); + ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); if (ret) return ret; @@ -1139,14 +1179,6 @@ static int skl_init_workarounds(struct intel_engine_cs *engine) if (ret) return ret; - /* - * Actual WA is to disable percontext preemption granularity control - * until D0 which is the default case so this is equivalent to - * !WaDisablePerCtxtPreemptionGranularityControl:skl - */ - I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, - _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); - /* WaEnableGapsTsvCreditFix:skl */ I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE)); @@ -1221,9 +1253,12 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine) } /* WaProgramL3SqcReg1DefaultForPerf:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) - I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | - L3_HIGH_PRIO_CREDITS(2)); + if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { + u32 val = I915_READ(GEN8_L3SQCREG1); + val &= ~L3_PRIO_CREDITS_MASK; + val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2); + I915_WRITE(GEN8_L3SQCREG1, val); + } /* WaToEnableHwFixForPushConstHWBug:bxt */ if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) @@ -1278,7 +1313,16 @@ static int cnl_init_workarounds(struct intel_engine_cs *engine) /* FtrEnableFastAnisoL1BankingFix: cnl */ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); + /* WaDisable3DMidCmdPreemption:cnl */ + WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); + + /* WaDisableGPGPUMidCmdPreemption:cnl */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + /* WaEnablePreemptionGranularityControlByUMD:cnl */ + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); if (ret) return ret; @@ -1504,8 +1548,8 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) return false; - /* Both ports drained, no more ELSP submission? */ - if (port_request(&engine->execlists.port[0])) + /* Waiting to drain ELSP? */ + if (READ_ONCE(engine->execlists.active)) return false; /* ELSP is empty, but there are ready requests? */ @@ -1578,6 +1622,168 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine) } } +static void print_request(struct drm_printer *m, + struct drm_i915_gem_request *rq, + const char *prefix) +{ + drm_printf(m, "%s%x%s [%x:%x] prio=%d @ %dms: %s\n", prefix, + rq->global_seqno, + i915_gem_request_completed(rq) ? "!" : "", + rq->ctx->hw_id, rq->fence.seqno, + rq->priotree.priority, + jiffies_to_msecs(jiffies - rq->emitted_jiffies), + rq->timeline->common->name); +} + +void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m) +{ + struct intel_breadcrumbs * const b = &engine->breadcrumbs; + const struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_gpu_error * const error = &engine->i915->gpu_error; + struct drm_i915_private *dev_priv = engine->i915; + struct drm_i915_gem_request *rq; + struct rb_node *rb; + u64 addr; + + drm_printf(m, "%s\n", engine->name); + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", + intel_engine_get_seqno(engine), + intel_engine_last_submit(engine), + engine->hangcheck.seqno, + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), + engine->timeline->inflight_seqnos); + drm_printf(m, "\tReset count: %d\n", + i915_reset_engine_count(error, engine)); + + rcu_read_lock(); + + drm_printf(m, "\tRequests:\n"); + + rq = list_first_entry(&engine->timeline->requests, + struct drm_i915_gem_request, link); + if (&rq->link != &engine->timeline->requests) + print_request(m, rq, "\t\tfirst "); + + rq = list_last_entry(&engine->timeline->requests, + struct drm_i915_gem_request, link); + if (&rq->link != &engine->timeline->requests) + print_request(m, rq, "\t\tlast "); + + rq = i915_gem_find_active_request(engine); + if (rq) { + print_request(m, rq, "\t\tactive "); + drm_printf(m, + "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n", + rq->head, rq->postfix, rq->tail, + rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, + rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); + } + + drm_printf(m, "\tRING_START: 0x%08x [0x%08x]\n", + I915_READ(RING_START(engine->mmio_base)), + rq ? i915_ggtt_offset(rq->ring->vma) : 0); + drm_printf(m, "\tRING_HEAD: 0x%08x [0x%08x]\n", + I915_READ(RING_HEAD(engine->mmio_base)) & HEAD_ADDR, + rq ? rq->ring->head : 0); + drm_printf(m, "\tRING_TAIL: 0x%08x [0x%08x]\n", + I915_READ(RING_TAIL(engine->mmio_base)) & TAIL_ADDR, + rq ? rq->ring->tail : 0); + drm_printf(m, "\tRING_CTL: 0x%08x [%s]\n", + I915_READ(RING_CTL(engine->mmio_base)), + I915_READ(RING_CTL(engine->mmio_base)) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? "waiting" : ""); + + rcu_read_unlock(); + + addr = intel_engine_get_active_head(engine); + drm_printf(m, "\tACTHD: 0x%08x_%08x\n", + upper_32_bits(addr), lower_32_bits(addr)); + addr = intel_engine_get_last_batch_head(engine); + drm_printf(m, "\tBBADDR: 0x%08x_%08x\n", + upper_32_bits(addr), lower_32_bits(addr)); + + if (i915_modparams.enable_execlists) { + const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; + u32 ptr, read, write; + unsigned int idx; + + drm_printf(m, "\tExeclist status: 0x%08x %08x\n", + I915_READ(RING_EXECLIST_STATUS_LO(engine)), + I915_READ(RING_EXECLIST_STATUS_HI(engine))); + + ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); + read = GEN8_CSB_READ_PTR(ptr); + write = GEN8_CSB_WRITE_PTR(ptr); + drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n", + read, execlists->csb_head, + write, + intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)), + yesno(test_bit(ENGINE_IRQ_EXECLIST, + &engine->irq_posted))); + if (read >= GEN8_CSB_ENTRIES) + read = 0; + if (write >= GEN8_CSB_ENTRIES) + write = 0; + if (read > write) + write += GEN8_CSB_ENTRIES; + while (read < write) { + idx = ++read % GEN8_CSB_ENTRIES; + drm_printf(m, "\tExeclist CSB[%d]: 0x%08x [0x%08x in hwsp], context: %d [%d in hwsp]\n", + idx, + I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)), + hws[idx * 2], + I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx)), + hws[idx * 2 + 1]); + } + + rcu_read_lock(); + for (idx = 0; idx < execlists_num_ports(execlists); idx++) { + unsigned int count; + + rq = port_unpack(&execlists->port[idx], &count); + if (rq) { + drm_printf(m, "\t\tELSP[%d] count=%d, ", + idx, count); + print_request(m, rq, "rq: "); + } else { + drm_printf(m, "\t\tELSP[%d] idle\n", + idx); + } + } + drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active); + rcu_read_unlock(); + } else if (INTEL_GEN(dev_priv) > 6) { + drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", + I915_READ(RING_PP_DIR_BASE(engine))); + drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n", + I915_READ(RING_PP_DIR_BASE_READ(engine))); + drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n", + I915_READ(RING_PP_DIR_DCLV(engine))); + } + + spin_lock_irq(&engine->timeline->lock); + list_for_each_entry(rq, &engine->timeline->requests, link) + print_request(m, rq, "\t\tE "); + for (rb = execlists->first; rb; rb = rb_next(rb)) { + struct i915_priolist *p = + rb_entry(rb, typeof(*p), node); + + list_for_each_entry(rq, &p->requests, priotree.link) + print_request(m, rq, "\t\tQ "); + } + spin_unlock_irq(&engine->timeline->lock); + + spin_lock_irq(&b->rb_lock); + for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { + struct intel_wait *w = rb_entry(rb, typeof(*w), node); + + drm_printf(m, "\t%s [%d] waiting for %x\n", + w->tsk->comm, w->tsk->pid, w->seqno); + } + spin_unlock_irq(&b->rb_lock); + + drm_printf(m, "\n"); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_engine.c" #endif diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 8e3a05505f49..1a0f5e0c8d10 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -69,9 +69,9 @@ static inline bool no_fbc_on_multiple_pipes(struct drm_i915_private *dev_priv) * address we program because it starts at the real start of the buffer, so we * have to take this into consideration here. */ -static unsigned int get_crtc_fence_y_offset(struct intel_crtc *crtc) +static unsigned int get_crtc_fence_y_offset(struct intel_fbc *fbc) { - return crtc->base.y - crtc->adjusted_y; + return fbc->state_cache.plane.y - fbc->state_cache.plane.adjusted_y; } /* @@ -727,8 +727,8 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc) intel_fbc_get_plane_source_size(&fbc->state_cache, &effective_w, &effective_h); - effective_w += crtc->adjusted_x; - effective_h += crtc->adjusted_y; + effective_w += fbc->state_cache.plane.adjusted_x; + effective_h += fbc->state_cache.plane.adjusted_y; return effective_w <= max_w && effective_h <= max_h; } @@ -757,6 +757,9 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, cache->plane.src_w = drm_rect_width(&plane_state->base.src) >> 16; cache->plane.src_h = drm_rect_height(&plane_state->base.src) >> 16; cache->plane.visible = plane_state->base.visible; + cache->plane.adjusted_x = plane_state->main.x; + cache->plane.adjusted_y = plane_state->main.y; + cache->plane.y = plane_state->base.src.y1 >> 16; if (!cache->plane.visible) return; @@ -888,7 +891,7 @@ static void intel_fbc_get_reg_params(struct intel_crtc *crtc, params->crtc.pipe = crtc->pipe; params->crtc.plane = crtc->plane; - params->crtc.fence_y_offset = get_crtc_fence_y_offset(crtc); + params->crtc.fence_y_offset = get_crtc_fence_y_offset(fbc); params->fb.format = cache->fb.format; params->fb.stride = cache->fb.stride; diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index f2bb8116227c..b8af35187d22 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -189,7 +189,7 @@ static int intelfb_create(struct drm_fb_helper *helper, " releasing it\n", intel_fb->base.width, intel_fb->base.height, sizes->fb_width, sizes->fb_height); - drm_framebuffer_unreference(&intel_fb->base); + drm_framebuffer_put(&intel_fb->base); intel_fb = ifbdev->fb = NULL; } if (!intel_fb || WARN_ON(!intel_fb->obj)) { @@ -627,7 +627,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, ifbdev->preferred_bpp = fb->base.format->cpp[0] * 8; ifbdev->fb = fb; - drm_framebuffer_reference(&ifbdev->fb->base); + drm_framebuffer_get(&ifbdev->fb->base); /* Final pass to check if any active pipes don't have fbs */ for_each_crtc(dev, crtc) { diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c new file mode 100644 index 000000000000..10037c0fdf95 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -0,0 +1,369 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "intel_guc.h" +#include "i915_drv.h" + +static void gen8_guc_raise_irq(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + + I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); +} + +static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i) +{ + GEM_BUG_ON(!guc->send_regs.base); + GEM_BUG_ON(!guc->send_regs.count); + GEM_BUG_ON(i >= guc->send_regs.count); + + return _MMIO(guc->send_regs.base + 4 * i); +} + +void intel_guc_init_send_regs(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + enum forcewake_domains fw_domains = 0; + unsigned int i; + + guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0)); + guc->send_regs.count = SOFT_SCRATCH_COUNT - 1; + + for (i = 0; i < guc->send_regs.count; i++) { + fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, + guc_send_reg(guc, i), + FW_REG_READ | FW_REG_WRITE); + } + guc->send_regs.fw_domains = fw_domains; +} + +void intel_guc_init_early(struct intel_guc *guc) +{ + intel_guc_ct_init_early(&guc->ct); + + mutex_init(&guc->send_mutex); + guc->send = intel_guc_send_nop; + guc->notify = gen8_guc_raise_irq; +} + +static u32 get_gt_type(struct drm_i915_private *dev_priv) +{ + /* XXX: GT type based on PCI device ID? field seems unused by fw */ + return 0; +} + +static u32 get_core_family(struct drm_i915_private *dev_priv) +{ + u32 gen = INTEL_GEN(dev_priv); + + switch (gen) { + case 9: + return GUC_CORE_FAMILY_GEN9; + + default: + MISSING_CASE(gen); + return GUC_CORE_FAMILY_UNKNOWN; + } +} + +/* + * Initialise the GuC parameter block before starting the firmware + * transfer. These parameters are read by the firmware on startup + * and cannot be changed thereafter. + */ +void intel_guc_init_params(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + u32 params[GUC_CTL_MAX_DWORDS]; + int i; + + memset(params, 0, sizeof(params)); + + params[GUC_CTL_DEVICE_INFO] |= + (get_gt_type(dev_priv) << GUC_CTL_GT_TYPE_SHIFT) | + (get_core_family(dev_priv) << GUC_CTL_CORE_FAMILY_SHIFT); + + /* + * GuC ARAT increment is 10 ns. GuC default scheduler quantum is one + * second. This ARAR is calculated by: + * Scheduler-Quantum-in-ns / ARAT-increment-in-ns = 1000000000 / 10 + */ + params[GUC_CTL_ARAT_HIGH] = 0; + params[GUC_CTL_ARAT_LOW] = 100000000; + + params[GUC_CTL_WA] |= GUC_CTL_WA_UK_BY_DRIVER; + + params[GUC_CTL_FEATURE] |= GUC_CTL_DISABLE_SCHEDULER | + GUC_CTL_VCS2_ENABLED; + + params[GUC_CTL_LOG_PARAMS] = guc->log.flags; + + if (i915_modparams.guc_log_level >= 0) { + params[GUC_CTL_DEBUG] = + i915_modparams.guc_log_level << GUC_LOG_VERBOSITY_SHIFT; + } else { + params[GUC_CTL_DEBUG] = GUC_LOG_DISABLED; + } + + /* If GuC submission is enabled, set up additional parameters here */ + if (i915_modparams.enable_guc_submission) { + u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT; + u32 pgs = guc_ggtt_offset(dev_priv->guc.stage_desc_pool); + u32 ctx_in_16 = GUC_MAX_STAGE_DESCRIPTORS / 16; + + params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT; + params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED; + + pgs >>= PAGE_SHIFT; + params[GUC_CTL_CTXINFO] = (pgs << GUC_CTL_BASE_ADDR_SHIFT) | + (ctx_in_16 << GUC_CTL_CTXNUM_IN16_SHIFT); + + params[GUC_CTL_FEATURE] |= GUC_CTL_KERNEL_SUBMISSIONS; + + /* Unmask this bit to enable the GuC's internal scheduler */ + params[GUC_CTL_FEATURE] &= ~GUC_CTL_DISABLE_SCHEDULER; + } + + /* + * All SOFT_SCRATCH registers are in FORCEWAKE_BLITTER domain and + * they are power context saved so it's ok to release forcewake + * when we are done here and take it again at xfer time. + */ + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_BLITTER); + + I915_WRITE(SOFT_SCRATCH(0), 0); + + for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) + I915_WRITE(SOFT_SCRATCH(1 + i), params[i]); + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_BLITTER); +} + +int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len) +{ + WARN(1, "Unexpected send: action=%#x\n", *action); + return -ENODEV; +} + +/* + * This function implements the MMIO based host to GuC interface. + */ +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + u32 status; + int i; + int ret; + + GEM_BUG_ON(!len); + GEM_BUG_ON(len > guc->send_regs.count); + + /* If CT is available, we expect to use MMIO only during init/fini */ + GEM_BUG_ON(HAS_GUC_CT(dev_priv) && + *action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER && + *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER); + + mutex_lock(&guc->send_mutex); + intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains); + + for (i = 0; i < len; i++) + I915_WRITE(guc_send_reg(guc, i), action[i]); + + POSTING_READ(guc_send_reg(guc, i - 1)); + + intel_guc_notify(guc); + + /* + * No GuC command should ever take longer than 10ms. + * Fast commands should still complete in 10us. + */ + ret = __intel_wait_for_register_fw(dev_priv, + guc_send_reg(guc, 0), + INTEL_GUC_RECV_MASK, + INTEL_GUC_RECV_MASK, + 10, 10, &status); + if (status != INTEL_GUC_STATUS_SUCCESS) { + /* + * Either the GuC explicitly returned an error (which + * we convert to -EIO here) or no response at all was + * received within the timeout limit (-ETIMEDOUT) + */ + if (ret != -ETIMEDOUT) + ret = -EIO; + + DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;" + " ret=%d status=0x%08X response=0x%08X\n", + action[0], ret, status, I915_READ(SOFT_SCRATCH(15))); + } + + intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains); + mutex_unlock(&guc->send_mutex); + + return ret; +} + +int intel_guc_sample_forcewake(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + u32 action[2]; + + action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE; + /* WaRsDisableCoarsePowerGating:skl,bxt */ + if (!intel_rc6_enabled() || + NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) + action[1] = 0; + else + /* bit 0 and 1 are for Render and Media domain separately */ + action[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +/** + * intel_guc_auth_huc() - Send action to GuC to authenticate HuC ucode + * @guc: intel_guc structure + * @rsa_offset: rsa offset w.r.t ggtt base of huc vma + * + * Triggers a HuC firmware authentication request to the GuC via intel_guc_send + * INTEL_GUC_ACTION_AUTHENTICATE_HUC interface. This function is invoked by + * intel_huc_auth(). + * + * Return: non-zero code on error + */ +int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset) +{ + u32 action[] = { + INTEL_GUC_ACTION_AUTHENTICATE_HUC, + rsa_offset + }; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +/** + * intel_guc_suspend() - notify GuC entering suspend state + * @dev_priv: i915 device private + */ +int intel_guc_suspend(struct drm_i915_private *dev_priv) +{ + struct intel_guc *guc = &dev_priv->guc; + struct i915_gem_context *ctx; + u32 data[3]; + + if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) + return 0; + + gen9_disable_guc_interrupts(dev_priv); + + ctx = dev_priv->kernel_context; + + data[0] = INTEL_GUC_ACTION_ENTER_S_STATE; + /* any value greater than GUC_POWER_D0 */ + data[1] = GUC_POWER_D1; + /* first page is shared data with GuC */ + data[2] = guc_ggtt_offset(ctx->engine[RCS].state) + + LRC_GUCSHR_PN * PAGE_SIZE; + + return intel_guc_send(guc, data, ARRAY_SIZE(data)); +} + +/** + * intel_guc_resume() - notify GuC resuming from suspend state + * @dev_priv: i915 device private + */ +int intel_guc_resume(struct drm_i915_private *dev_priv) +{ + struct intel_guc *guc = &dev_priv->guc; + struct i915_gem_context *ctx; + u32 data[3]; + + if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) + return 0; + + if (i915_modparams.guc_log_level >= 0) + gen9_enable_guc_interrupts(dev_priv); + + ctx = dev_priv->kernel_context; + + data[0] = INTEL_GUC_ACTION_EXIT_S_STATE; + data[1] = GUC_POWER_D0; + /* first page is shared data with GuC */ + data[2] = guc_ggtt_offset(ctx->engine[RCS].state) + + LRC_GUCSHR_PN * PAGE_SIZE; + + return intel_guc_send(guc, data, ARRAY_SIZE(data)); +} + +/** + * intel_guc_allocate_vma() - Allocate a GGTT VMA for GuC usage + * @guc: the guc + * @size: size of area to allocate (both virtual space and memory) + * + * This is a wrapper to create an object for use with the GuC. In order to + * use it inside the GuC, an object needs to be pinned lifetime, so we allocate + * both some backing storage and a range inside the Global GTT. We must pin + * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that + * range is reserved inside GuC. + * + * Return: A i915_vma if successful, otherwise an ERR_PTR. + */ +struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int ret; + + obj = i915_gem_object_create(dev_priv, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); + if (IS_ERR(vma)) + goto err; + + ret = i915_vma_pin(vma, 0, PAGE_SIZE, + PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + if (ret) { + vma = ERR_PTR(ret); + goto err; + } + + return vma; + +err: + i915_gem_object_put(obj); + return vma; +} + +u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv) +{ + u32 wopcm_size = GUC_WOPCM_TOP; + + /* On BXT, the top of WOPCM is reserved for RC6 context */ + if (IS_GEN9_LP(dev_priv)) + wopcm_size -= BXT_GUC_WOPCM_RC6_RESERVED; + + return wopcm_size; +} diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h new file mode 100644 index 000000000000..418450b1ae27 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -0,0 +1,120 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_GUC_H_ +#define _INTEL_GUC_H_ + +#include "intel_uncore.h" +#include "intel_guc_fw.h" +#include "intel_guc_fwif.h" +#include "intel_guc_ct.h" +#include "intel_guc_log.h" +#include "intel_uc_fw.h" +#include "i915_guc_reg.h" +#include "i915_vma.h" + +/* + * Top level structure of GuC. It handles firmware loading and manages client + * pool and doorbells. intel_guc owns a i915_guc_client to replace the legacy + * ExecList submission. + */ +struct intel_guc { + struct intel_uc_fw fw; + struct intel_guc_log log; + struct intel_guc_ct ct; + + /* Log snapshot if GuC errors during load */ + struct drm_i915_gem_object *load_err_log; + + /* intel_guc_recv interrupt related state */ + bool interrupts_enabled; + + struct i915_vma *ads_vma; + struct i915_vma *stage_desc_pool; + void *stage_desc_pool_vaddr; + struct ida stage_ids; + + struct i915_guc_client *execbuf_client; + + DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); + /* Cyclic counter mod pagesize */ + u32 db_cacheline; + + /* GuC's FW specific registers used in MMIO send */ + struct { + u32 base; + unsigned int count; + enum forcewake_domains fw_domains; + } send_regs; + + /* To serialize the intel_guc_send actions */ + struct mutex send_mutex; + + /* GuC's FW specific send function */ + int (*send)(struct intel_guc *guc, const u32 *data, u32 len); + + /* GuC's FW specific notify function */ + void (*notify)(struct intel_guc *guc); +}; + +static +inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) +{ + return guc->send(guc, action, len); +} + +static inline void intel_guc_notify(struct intel_guc *guc) +{ + guc->notify(guc); +} + +/* + * GuC does not allow any gfx GGTT address that falls into range [0, WOPCM_TOP), + * which is reserved for Boot ROM, SRAM and WOPCM. Currently this top address is + * 512K. In order to exclude 0-512K address space from GGTT, all gfx objects + * used by GuC is pinned with PIN_OFFSET_BIAS along with size of WOPCM. + */ +static inline u32 guc_ggtt_offset(struct i915_vma *vma) +{ + u32 offset = i915_ggtt_offset(vma); + + GEM_BUG_ON(offset < GUC_WOPCM_TOP); + GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP)); + + return offset; +} + +void intel_guc_init_early(struct intel_guc *guc); +void intel_guc_init_send_regs(struct intel_guc *guc); +void intel_guc_init_params(struct intel_guc *guc); +int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len); +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); +int intel_guc_sample_forcewake(struct intel_guc *guc); +int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset); +int intel_guc_suspend(struct drm_i915_private *dev_priv); +int intel_guc_resume(struct drm_i915_private *dev_priv); +struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); +u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_fw.c index c9e25be4db40..ef67a36354c5 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_fw.c @@ -26,31 +26,9 @@ * Dave Gordon <david.s.gordon@intel.com> * Alex Dai <yu.dai@intel.com> */ -#include "i915_drv.h" -#include "intel_uc.h" -/** - * DOC: GuC-specific firmware loader - * - * intel_guc: - * Top level structure of guc. It handles firmware loading and manages client - * pool and doorbells. intel_guc owns a i915_guc_client to replace the legacy - * ExecList submission. - * - * Firmware versioning: - * The firmware build process will generate a version header file with major and - * minor version defined. The versions are built into CSS header of firmware. - * i915 kernel driver set the minimal firmware version required per platform. - * The firmware installation package will install (symbolic link) proper version - * of firmware. - * - * GuC address space: - * GuC does not allow any gfx GGTT address that falls into range [0, WOPCM_TOP), - * which is reserved for Boot ROM, SRAM and WOPCM. Currently this top address is - * 512K. In order to exclude 0-512K address space from GGTT, all gfx objects - * used by GuC is pinned with PIN_OFFSET_BIAS along with size of WOPCM. - * - */ +#include "intel_guc_fw.h" +#include "i915_drv.h" #define SKL_FW_MAJOR 6 #define SKL_FW_MINOR 1 @@ -78,88 +56,45 @@ MODULE_FIRMWARE(I915_KBL_GUC_UCODE); #define I915_GLK_GUC_UCODE GUC_FW_PATH(glk, GLK_FW_MAJOR, GLK_FW_MINOR) - -static u32 get_gttype(struct drm_i915_private *dev_priv) -{ - /* XXX: GT type based on PCI device ID? field seems unused by fw */ - return 0; -} - -static u32 get_core_family(struct drm_i915_private *dev_priv) -{ - u32 gen = INTEL_GEN(dev_priv); - - switch (gen) { - case 9: - return GUC_CORE_FAMILY_GEN9; - - default: - MISSING_CASE(gen); - return GUC_CORE_FAMILY_UNKNOWN; - } -} - -/* - * Initialise the GuC parameter block before starting the firmware - * transfer. These parameters are read by the firmware on startup - * and cannot be changed thereafter. +/** + * intel_guc_fw_select() - selects GuC firmware for uploading + * + * @guc: intel_guc struct + * + * Return: zero when we know firmware, non-zero in other case */ -static void guc_params_init(struct drm_i915_private *dev_priv) +int intel_guc_fw_select(struct intel_guc *guc) { - struct intel_guc *guc = &dev_priv->guc; - u32 params[GUC_CTL_MAX_DWORDS]; - int i; - - memset(¶ms, 0, sizeof(params)); - - params[GUC_CTL_DEVICE_INFO] |= - (get_gttype(dev_priv) << GUC_CTL_GTTYPE_SHIFT) | - (get_core_family(dev_priv) << GUC_CTL_COREFAMILY_SHIFT); - - /* - * GuC ARAT increment is 10 ns. GuC default scheduler quantum is one - * second. This ARAR is calculated by: - * Scheduler-Quantum-in-ns / ARAT-increment-in-ns = 1000000000 / 10 - */ - params[GUC_CTL_ARAT_HIGH] = 0; - params[GUC_CTL_ARAT_LOW] = 100000000; - - params[GUC_CTL_WA] |= GUC_CTL_WA_UK_BY_DRIVER; - - params[GUC_CTL_FEATURE] |= GUC_CTL_DISABLE_SCHEDULER | - GUC_CTL_VCS2_ENABLED; - - params[GUC_CTL_LOG_PARAMS] = guc->log.flags; - - if (i915_modparams.guc_log_level >= 0) { - params[GUC_CTL_DEBUG] = - i915_modparams.guc_log_level << GUC_LOG_VERBOSITY_SHIFT; - } else - params[GUC_CTL_DEBUG] = GUC_LOG_DISABLED; - - /* If GuC submission is enabled, set up additional parameters here */ - if (i915_modparams.enable_guc_submission) { - u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT; - u32 pgs = guc_ggtt_offset(dev_priv->guc.stage_desc_pool); - u32 ctx_in_16 = GUC_MAX_STAGE_DESCRIPTORS / 16; - - params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT; - params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED; - - pgs >>= PAGE_SHIFT; - params[GUC_CTL_CTXINFO] = (pgs << GUC_CTL_BASE_ADDR_SHIFT) | - (ctx_in_16 << GUC_CTL_CTXNUM_IN16_SHIFT); + struct drm_i915_private *dev_priv = guc_to_i915(guc); - params[GUC_CTL_FEATURE] |= GUC_CTL_KERNEL_SUBMISSIONS; + intel_uc_fw_init(&guc->fw, INTEL_UC_FW_TYPE_GUC); - /* Unmask this bit to enable the GuC's internal scheduler */ - params[GUC_CTL_FEATURE] &= ~GUC_CTL_DISABLE_SCHEDULER; + if (i915_modparams.guc_firmware_path) { + guc->fw.path = i915_modparams.guc_firmware_path; + guc->fw.major_ver_wanted = 0; + guc->fw.minor_ver_wanted = 0; + } else if (IS_SKYLAKE(dev_priv)) { + guc->fw.path = I915_SKL_GUC_UCODE; + guc->fw.major_ver_wanted = SKL_FW_MAJOR; + guc->fw.minor_ver_wanted = SKL_FW_MINOR; + } else if (IS_BROXTON(dev_priv)) { + guc->fw.path = I915_BXT_GUC_UCODE; + guc->fw.major_ver_wanted = BXT_FW_MAJOR; + guc->fw.minor_ver_wanted = BXT_FW_MINOR; + } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { + guc->fw.path = I915_KBL_GUC_UCODE; + guc->fw.major_ver_wanted = KBL_FW_MAJOR; + guc->fw.minor_ver_wanted = KBL_FW_MINOR; + } else if (IS_GEMINILAKE(dev_priv)) { + guc->fw.path = I915_GLK_GUC_UCODE; + guc->fw.major_ver_wanted = GLK_FW_MAJOR; + guc->fw.minor_ver_wanted = GLK_FW_MINOR; + } else { + DRM_ERROR("No GuC firmware known for platform with GuC!\n"); + return -ENOENT; } - I915_WRITE(SOFT_SCRATCH(0), 0); - - for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) - I915_WRITE(SOFT_SCRATCH(1 + i), params[i]); + return 0; } /* @@ -250,38 +185,16 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv, return ret; } -u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv) -{ - u32 wopcm_size = GUC_WOPCM_TOP; - - /* On BXT, the top of WOPCM is reserved for RC6 context */ - if (IS_GEN9_LP(dev_priv)) - wopcm_size -= BXT_GUC_WOPCM_RC6_RESERVED; - - return wopcm_size; -} - /* * Load the GuC firmware blob into the MinuteIA. */ -static int guc_ucode_xfer(struct drm_i915_private *dev_priv) +static int guc_ucode_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma) { - struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; - struct i915_vma *vma; + struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw); + struct drm_i915_private *dev_priv = guc_to_i915(guc); int ret; - ret = i915_gem_object_set_to_gtt_domain(guc_fw->obj, false); - if (ret) { - DRM_DEBUG_DRIVER("set-domain failed %d\n", ret); - return ret; - } - - vma = i915_gem_object_ggtt_pin(guc_fw->obj, NULL, 0, 0, - PIN_OFFSET_BIAS | GUC_WOPCM_TOP); - if (IS_ERR(vma)) { - DRM_DEBUG_DRIVER("pin failed %d\n", (int)PTR_ERR(vma)); - return PTR_ERR(vma); - } + GEM_BUG_ON(guc_fw->type != INTEL_UC_FW_TYPE_GUC); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -312,23 +225,15 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) I915_WRITE(GUC_ARAT_C6DIS, 0x1FF); } - guc_params_init(dev_priv); - ret = guc_ucode_xfer_dma(dev_priv, vma); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - /* - * We keep the object pages for reuse during resume. But we can unpin it - * now that DMA has completed, so it doesn't continue to take up space. - */ - i915_vma_unpin(vma); - return ret; } /** - * intel_guc_init_hw() - finish preparing the GuC for activity + * intel_guc_fw_upload() - finish preparing the GuC for activity * @guc: intel_guc structure * * Called during driver loading and also after a GPU reset. @@ -340,81 +245,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) * * Return: non-zero code on error */ -int intel_guc_init_hw(struct intel_guc *guc) +int intel_guc_fw_upload(struct intel_guc *guc) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); - const char *fw_path = guc->fw.path; - int ret; - - DRM_DEBUG_DRIVER("GuC fw status: path %s, fetch %s, load %s\n", - fw_path, - intel_uc_fw_status_repr(guc->fw.fetch_status), - intel_uc_fw_status_repr(guc->fw.load_status)); - - if (guc->fw.fetch_status != INTEL_UC_FIRMWARE_SUCCESS) - return -EIO; - - guc->fw.load_status = INTEL_UC_FIRMWARE_PENDING; - - DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n", - intel_uc_fw_status_repr(guc->fw.fetch_status), - intel_uc_fw_status_repr(guc->fw.load_status)); - - ret = guc_ucode_xfer(dev_priv); - - if (ret) - return -EAGAIN; - - guc->fw.load_status = INTEL_UC_FIRMWARE_SUCCESS; - - DRM_INFO("GuC %s (firmware %s [version %u.%u])\n", - i915_modparams.enable_guc_submission ? "submission enabled" : - "loaded", - guc->fw.path, - guc->fw.major_ver_found, guc->fw.minor_ver_found); - - return 0; -} - -/** - * intel_guc_select_fw() - selects GuC firmware for loading - * @guc: intel_guc struct - * - * Return: zero when we know firmware, non-zero in other case - */ -int intel_guc_select_fw(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - - guc->fw.path = NULL; - guc->fw.fetch_status = INTEL_UC_FIRMWARE_NONE; - guc->fw.load_status = INTEL_UC_FIRMWARE_NONE; - guc->fw.type = INTEL_UC_FW_TYPE_GUC; - - if (i915_modparams.guc_firmware_path) { - guc->fw.path = i915_modparams.guc_firmware_path; - guc->fw.major_ver_wanted = 0; - guc->fw.minor_ver_wanted = 0; - } else if (IS_SKYLAKE(dev_priv)) { - guc->fw.path = I915_SKL_GUC_UCODE; - guc->fw.major_ver_wanted = SKL_FW_MAJOR; - guc->fw.minor_ver_wanted = SKL_FW_MINOR; - } else if (IS_BROXTON(dev_priv)) { - guc->fw.path = I915_BXT_GUC_UCODE; - guc->fw.major_ver_wanted = BXT_FW_MAJOR; - guc->fw.minor_ver_wanted = BXT_FW_MINOR; - } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { - guc->fw.path = I915_KBL_GUC_UCODE; - guc->fw.major_ver_wanted = KBL_FW_MAJOR; - guc->fw.minor_ver_wanted = KBL_FW_MINOR; - } else if (IS_GEMINILAKE(dev_priv)) { - guc->fw.path = I915_GLK_GUC_UCODE; - guc->fw.major_ver_wanted = GLK_FW_MAJOR; - guc->fw.minor_ver_wanted = GLK_FW_MINOR; - } else { - DRM_ERROR("No GuC firmware known for platform with GuC!\n"); - return -ENOENT; - } - - return 0; + return intel_uc_fw_upload(&guc->fw, guc_ucode_xfer); } diff --git a/drivers/gpu/drm/i915/intel_guc_fw.h b/drivers/gpu/drm/i915/intel_guc_fw.h new file mode 100644 index 000000000000..023f5baa9dd6 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc_fw.h @@ -0,0 +1,33 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_GUC_FW_H_ +#define _INTEL_GUC_FW_H_ + +struct intel_guc; + +int intel_guc_fw_select(struct intel_guc *guc); +int intel_guc_fw_upload(struct intel_guc *guc); + +#endif diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index 7eb6b4fa1d6f..80c507435458 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -82,8 +82,8 @@ #define GUC_CTL_ARAT_LOW 2 #define GUC_CTL_DEVICE_INFO 3 -#define GUC_CTL_GTTYPE_SHIFT 0 -#define GUC_CTL_COREFAMILY_SHIFT 7 +#define GUC_CTL_GT_TYPE_SHIFT 0 +#define GUC_CTL_CORE_FAMILY_SHIFT 7 #define GUC_CTL_LOG_PARAMS 4 #define GUC_LOG_VALID (1 << 0) @@ -178,49 +178,49 @@ */ struct uc_css_header { - uint32_t module_type; + u32 module_type; /* header_size includes all non-uCode bits, including css_header, rsa * key, modulus key and exponent data. */ - uint32_t header_size_dw; - uint32_t header_version; - uint32_t module_id; - uint32_t module_vendor; + u32 header_size_dw; + u32 header_version; + u32 module_id; + u32 module_vendor; union { struct { - uint8_t day; - uint8_t month; - uint16_t year; + u8 day; + u8 month; + u16 year; }; - uint32_t date; + u32 date; }; - uint32_t size_dw; /* uCode plus header_size_dw */ - uint32_t key_size_dw; - uint32_t modulus_size_dw; - uint32_t exponent_size_dw; + u32 size_dw; /* uCode plus header_size_dw */ + u32 key_size_dw; + u32 modulus_size_dw; + u32 exponent_size_dw; union { struct { - uint8_t hour; - uint8_t min; - uint16_t sec; + u8 hour; + u8 min; + u16 sec; }; - uint32_t time; + u32 time; }; char username[8]; char buildnumber[12]; union { struct { - uint32_t branch_client_version; - uint32_t sw_version; + u32 branch_client_version; + u32 sw_version; } guc; struct { - uint32_t sw_version; - uint32_t reserved; + u32 sw_version; + u32 reserved; } huc; }; - uint32_t prod_preprod_fw; - uint32_t reserved[12]; - uint32_t header_info; + u32 prod_preprod_fw; + u32 reserved[12]; + u32 header_info; } __packed; struct guc_doorbell_info { diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index 6571d96704ad..76d3eb1e4614 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -21,8 +21,11 @@ * IN THE SOFTWARE. * */ + #include <linux/debugfs.h> #include <linux/relay.h> + +#include "intel_guc_log.h" #include "i915_drv.h" static void guc_log_capture_logs(struct intel_guc *guc); @@ -525,7 +528,8 @@ int intel_guc_log_create(struct intel_guc *guc) { struct i915_vma *vma; unsigned long offset; - uint32_t size, flags; + u32 flags; + u32 size; int ret; GEM_BUG_ON(guc->log.vma); diff --git a/drivers/gpu/drm/i915/intel_guc_log.h b/drivers/gpu/drm/i915/intel_guc_log.h new file mode 100644 index 000000000000..f512cf79339b --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc_log.h @@ -0,0 +1,59 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_GUC_LOG_H_ +#define _INTEL_GUC_LOG_H_ + +#include <linux/workqueue.h> + +#include "intel_guc_fwif.h" + +struct drm_i915_private; +struct intel_guc; + +struct intel_guc_log { + u32 flags; + struct i915_vma *vma; + /* The runtime stuff gets created only when GuC logging gets enabled */ + struct { + void *buf_addr; + struct workqueue_struct *flush_wq; + struct work_struct flush_work; + struct rchan *relay_chan; + } runtime; + /* logging related stats */ + u32 capture_miss_count; + u32 flush_interrupt_count; + u32 prev_overflow_count[GUC_MAX_LOG_BUFFER]; + u32 total_overflow_count[GUC_MAX_LOG_BUFFER]; + u32 flush_count[GUC_MAX_LOG_BUFFER]; +}; + +int intel_guc_log_create(struct intel_guc *guc); +void intel_guc_log_destroy(struct intel_guc *guc); +int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); +void i915_guc_log_register(struct drm_i915_private *dev_priv); +void i915_guc_log_unregister(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index e6f8f30ce7bd..5132dc814788 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -70,7 +70,7 @@ static struct intel_hdmi *intel_attached_hdmi(struct drm_connector *connector) return enc_to_intel_hdmi(&intel_attached_encoder(connector)->base); } -static u32 g4x_infoframe_index(enum hdmi_infoframe_type type) +static u32 g4x_infoframe_index(unsigned int type) { switch (type) { case HDMI_INFOFRAME_TYPE_AVI: @@ -85,7 +85,7 @@ static u32 g4x_infoframe_index(enum hdmi_infoframe_type type) } } -static u32 g4x_infoframe_enable(enum hdmi_infoframe_type type) +static u32 g4x_infoframe_enable(unsigned int type) { switch (type) { case HDMI_INFOFRAME_TYPE_AVI: @@ -100,9 +100,11 @@ static u32 g4x_infoframe_enable(enum hdmi_infoframe_type type) } } -static u32 hsw_infoframe_enable(enum hdmi_infoframe_type type) +static u32 hsw_infoframe_enable(unsigned int type) { switch (type) { + case DP_SDP_VSC: + return VIDEO_DIP_ENABLE_VSC_HSW; case HDMI_INFOFRAME_TYPE_AVI: return VIDEO_DIP_ENABLE_AVI_HSW; case HDMI_INFOFRAME_TYPE_SPD: @@ -118,10 +120,12 @@ static u32 hsw_infoframe_enable(enum hdmi_infoframe_type type) static i915_reg_t hsw_dip_data_reg(struct drm_i915_private *dev_priv, enum transcoder cpu_transcoder, - enum hdmi_infoframe_type type, + unsigned int type, int i) { switch (type) { + case DP_SDP_VSC: + return HSW_TVIDEO_DIP_VSC_DATA(cpu_transcoder, i); case HDMI_INFOFRAME_TYPE_AVI: return HSW_TVIDEO_DIP_AVI_DATA(cpu_transcoder, i); case HDMI_INFOFRAME_TYPE_SPD: @@ -136,7 +140,7 @@ hsw_dip_data_reg(struct drm_i915_private *dev_priv, static void g4x_write_infoframe(struct drm_encoder *encoder, const struct intel_crtc_state *crtc_state, - enum hdmi_infoframe_type type, + unsigned int type, const void *frame, ssize_t len) { const uint32_t *data = frame; @@ -191,7 +195,7 @@ static bool g4x_infoframe_enabled(struct drm_encoder *encoder, static void ibx_write_infoframe(struct drm_encoder *encoder, const struct intel_crtc_state *crtc_state, - enum hdmi_infoframe_type type, + unsigned int type, const void *frame, ssize_t len) { const uint32_t *data = frame; @@ -251,7 +255,7 @@ static bool ibx_infoframe_enabled(struct drm_encoder *encoder, static void cpt_write_infoframe(struct drm_encoder *encoder, const struct intel_crtc_state *crtc_state, - enum hdmi_infoframe_type type, + unsigned int type, const void *frame, ssize_t len) { const uint32_t *data = frame; @@ -309,7 +313,7 @@ static bool cpt_infoframe_enabled(struct drm_encoder *encoder, static void vlv_write_infoframe(struct drm_encoder *encoder, const struct intel_crtc_state *crtc_state, - enum hdmi_infoframe_type type, + unsigned int type, const void *frame, ssize_t len) { const uint32_t *data = frame; @@ -368,7 +372,7 @@ static bool vlv_infoframe_enabled(struct drm_encoder *encoder, static void hsw_write_infoframe(struct drm_encoder *encoder, const struct intel_crtc_state *crtc_state, - enum hdmi_infoframe_type type, + unsigned int type, const void *frame, ssize_t len) { const uint32_t *data = frame; @@ -377,6 +381,8 @@ static void hsw_write_infoframe(struct drm_encoder *encoder, enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; i915_reg_t ctl_reg = HSW_TVIDEO_DIP_CTL(cpu_transcoder); i915_reg_t data_reg; + int data_size = type == DP_SDP_VSC ? + VIDEO_DIP_VSC_DATA_SIZE : VIDEO_DIP_DATA_SIZE; int i; u32 val = I915_READ(ctl_reg); @@ -392,7 +398,7 @@ static void hsw_write_infoframe(struct drm_encoder *encoder, data++; } /* Write every possible data byte to force correct ECC calculation. */ - for (; i < VIDEO_DIP_DATA_SIZE; i += 4) + for (; i < data_size; i += 4) I915_WRITE(hsw_dip_data_reg(dev_priv, cpu_transcoder, type, i >> 2), 0); mmiowb(); diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 8b4b53525422..c8a48cbc2b7d 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -21,9 +21,11 @@ * IN THE SOFTWARE. * */ -#include <linux/firmware.h> + +#include <linux/types.h> + +#include "intel_huc.h" #include "i915_drv.h" -#include "intel_uc.h" /** * DOC: HuC Firmware @@ -76,6 +78,42 @@ MODULE_FIRMWARE(I915_KBL_HUC_UCODE); GLK_HUC_FW_MINOR, GLK_BLD_NUM) /** + * intel_huc_select_fw() - selects HuC firmware for loading + * @huc: intel_huc struct + */ +void intel_huc_select_fw(struct intel_huc *huc) +{ + struct drm_i915_private *dev_priv = huc_to_i915(huc); + + intel_uc_fw_init(&huc->fw, INTEL_UC_FW_TYPE_HUC); + + if (i915_modparams.huc_firmware_path) { + huc->fw.path = i915_modparams.huc_firmware_path; + huc->fw.major_ver_wanted = 0; + huc->fw.minor_ver_wanted = 0; + } else if (IS_SKYLAKE(dev_priv)) { + huc->fw.path = I915_SKL_HUC_UCODE; + huc->fw.major_ver_wanted = SKL_HUC_FW_MAJOR; + huc->fw.minor_ver_wanted = SKL_HUC_FW_MINOR; + } else if (IS_BROXTON(dev_priv)) { + huc->fw.path = I915_BXT_HUC_UCODE; + huc->fw.major_ver_wanted = BXT_HUC_FW_MAJOR; + huc->fw.minor_ver_wanted = BXT_HUC_FW_MINOR; + } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { + huc->fw.path = I915_KBL_HUC_UCODE; + huc->fw.major_ver_wanted = KBL_HUC_FW_MAJOR; + huc->fw.minor_ver_wanted = KBL_HUC_FW_MINOR; + } else if (IS_GEMINILAKE(dev_priv)) { + huc->fw.path = I915_GLK_HUC_UCODE; + huc->fw.major_ver_wanted = GLK_HUC_FW_MAJOR; + huc->fw.minor_ver_wanted = GLK_HUC_FW_MINOR; + } else { + DRM_ERROR("No HuC firmware known for platform with HuC!\n"); + return; + } +} + +/** * huc_ucode_xfer() - DMA's the firmware * @dev_priv: the drm_i915_private device * @@ -83,26 +121,15 @@ MODULE_FIRMWARE(I915_KBL_HUC_UCODE); * * Return: 0 on success, non-zero on failure */ -static int huc_ucode_xfer(struct drm_i915_private *dev_priv) +static int huc_ucode_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) { - struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; - struct i915_vma *vma; + struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); + struct drm_i915_private *dev_priv = huc_to_i915(huc); unsigned long offset = 0; u32 size; int ret; - ret = i915_gem_object_set_to_gtt_domain(huc_fw->obj, false); - if (ret) { - DRM_DEBUG_DRIVER("set-domain failed %d\n", ret); - return ret; - } - - vma = i915_gem_object_ggtt_pin(huc_fw->obj, NULL, 0, 0, - PIN_OFFSET_BIAS | GUC_WOPCM_TOP); - if (IS_ERR(vma)) { - DRM_DEBUG_DRIVER("pin failed %d\n", (int)PTR_ERR(vma)); - return PTR_ERR(vma); - } + GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -133,55 +160,10 @@ static int huc_ucode_xfer(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - /* - * We keep the object pages for reuse during resume. But we can unpin it - * now that DMA has completed, so it doesn't continue to take up space. - */ - i915_vma_unpin(vma); - return ret; } /** - * intel_huc_select_fw() - selects HuC firmware for loading - * @huc: intel_huc struct - */ -void intel_huc_select_fw(struct intel_huc *huc) -{ - struct drm_i915_private *dev_priv = huc_to_i915(huc); - - huc->fw.path = NULL; - huc->fw.fetch_status = INTEL_UC_FIRMWARE_NONE; - huc->fw.load_status = INTEL_UC_FIRMWARE_NONE; - huc->fw.type = INTEL_UC_FW_TYPE_HUC; - - if (i915_modparams.huc_firmware_path) { - huc->fw.path = i915_modparams.huc_firmware_path; - huc->fw.major_ver_wanted = 0; - huc->fw.minor_ver_wanted = 0; - } else if (IS_SKYLAKE(dev_priv)) { - huc->fw.path = I915_SKL_HUC_UCODE; - huc->fw.major_ver_wanted = SKL_HUC_FW_MAJOR; - huc->fw.minor_ver_wanted = SKL_HUC_FW_MINOR; - } else if (IS_BROXTON(dev_priv)) { - huc->fw.path = I915_BXT_HUC_UCODE; - huc->fw.major_ver_wanted = BXT_HUC_FW_MAJOR; - huc->fw.minor_ver_wanted = BXT_HUC_FW_MINOR; - } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { - huc->fw.path = I915_KBL_HUC_UCODE; - huc->fw.major_ver_wanted = KBL_HUC_FW_MAJOR; - huc->fw.minor_ver_wanted = KBL_HUC_FW_MINOR; - } else if (IS_GEMINILAKE(dev_priv)) { - huc->fw.path = I915_GLK_HUC_UCODE; - huc->fw.major_ver_wanted = GLK_HUC_FW_MAJOR; - huc->fw.minor_ver_wanted = GLK_HUC_FW_MINOR; - } else { - DRM_ERROR("No HuC firmware known for platform with HuC!\n"); - return; - } -} - -/** * intel_huc_init_hw() - load HuC uCode to device * @huc: intel_huc structure * @@ -195,33 +177,7 @@ void intel_huc_select_fw(struct intel_huc *huc) */ void intel_huc_init_hw(struct intel_huc *huc) { - struct drm_i915_private *dev_priv = huc_to_i915(huc); - int err; - - DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n", - huc->fw.path, - intel_uc_fw_status_repr(huc->fw.fetch_status), - intel_uc_fw_status_repr(huc->fw.load_status)); - - if (huc->fw.fetch_status != INTEL_UC_FIRMWARE_SUCCESS) - return; - - huc->fw.load_status = INTEL_UC_FIRMWARE_PENDING; - - err = huc_ucode_xfer(dev_priv); - - huc->fw.load_status = err ? - INTEL_UC_FIRMWARE_FAIL : INTEL_UC_FIRMWARE_SUCCESS; - - DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n", - huc->fw.path, - intel_uc_fw_status_repr(huc->fw.fetch_status), - intel_uc_fw_status_repr(huc->fw.load_status)); - - if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) - DRM_ERROR("Failed to complete HuC uCode load with ret %d\n", err); - - return; + intel_uc_fw_upload(&huc->fw, huc_ucode_xfer); } /** diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h new file mode 100644 index 000000000000..aaa38b9e5817 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_huc.h @@ -0,0 +1,41 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_HUC_H_ +#define _INTEL_HUC_H_ + +#include "intel_uc_fw.h" + +struct intel_huc { + /* Generic uC firmware management */ + struct intel_uc_fw fw; + + /* HuC-specific additions */ +}; + +void intel_huc_select_fw(struct intel_huc *huc); +void intel_huc_init_hw(struct intel_huc *huc); +void intel_huc_auth(struct intel_huc *huc); + +#endif diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 61cac26a8b05..d36e25607435 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -208,8 +208,9 @@ /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ - #define WA_TAIL_DWORDS 2 +#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS) +#define PREEMPT_ID 0x1 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); @@ -243,8 +244,7 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enabl return 0; if (HAS_LOGICAL_RING_CONTEXTS(dev_priv) && - USES_PPGTT(dev_priv) && - i915_modparams.use_mmio_flip >= 0) + USES_PPGTT(dev_priv)) return 1; return 0; @@ -348,6 +348,43 @@ find_priolist: return ptr_pack_bits(p, first, 1); } +static void unwind_wa_tail(struct drm_i915_gem_request *rq) +{ + rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); + assert_ring_tail_valid(rq->ring, rq->tail); +} + +static void unwind_incomplete_requests(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_request *rq, *rn; + struct i915_priolist *uninitialized_var(p); + int last_prio = I915_PRIORITY_INVALID; + + lockdep_assert_held(&engine->timeline->lock); + + list_for_each_entry_safe_reverse(rq, rn, + &engine->timeline->requests, + link) { + if (i915_gem_request_completed(rq)) + return; + + __i915_gem_request_unsubmit(rq); + unwind_wa_tail(rq); + + GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID); + if (rq->priotree.priority != last_prio) { + p = lookup_priolist(engine, + &rq->priotree, + rq->priotree.priority); + p = ptr_mask_bits(p, 1); + + last_prio = rq->priotree.priority; + } + + list_add(&rq->priotree.link, &p->requests); + } +} + static inline void execlists_context_status_change(struct drm_i915_gem_request *rq, unsigned long status) @@ -392,6 +429,12 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) return ce->lrc_desc; } +static inline void elsp_write(u64 desc, u32 __iomem *elsp) +{ + writel(upper_32_bits(desc), elsp); + writel(lower_32_bits(desc), elsp); +} + static void execlists_submit_ports(struct intel_engine_cs *engine) { struct execlist_port *port = engine->execlists.port; @@ -417,8 +460,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = 0; } - writel(upper_32_bits(desc), elsp); - writel(lower_32_bits(desc), elsp); + elsp_write(desc, elsp); } } @@ -451,26 +493,43 @@ static void port_assign(struct execlist_port *port, port_set(port, port_pack(i915_gem_request_get(rq), port_count(port))); } +static void inject_preempt_context(struct intel_engine_cs *engine) +{ + struct intel_context *ce = + &engine->i915->preempt_context->engine[engine->id]; + u32 __iomem *elsp = + engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); + unsigned int n; + + GEM_BUG_ON(engine->i915->preempt_context->hw_id != PREEMPT_ID); + GEM_BUG_ON(!IS_ALIGNED(ce->ring->size, WA_TAIL_BYTES)); + + memset(ce->ring->vaddr + ce->ring->tail, 0, WA_TAIL_BYTES); + ce->ring->tail += WA_TAIL_BYTES; + ce->ring->tail &= (ce->ring->size - 1); + ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail; + + for (n = execlists_num_ports(&engine->execlists); --n; ) + elsp_write(0, elsp); + + elsp_write(ce->lrc_desc, elsp); +} + +static bool can_preempt(struct intel_engine_cs *engine) +{ + return INTEL_INFO(engine->i915)->has_logical_ring_preemption; +} + static void execlists_dequeue(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *last; struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; const struct execlist_port * const last_port = &execlists->port[execlists->port_mask]; + struct drm_i915_gem_request *last = port_request(port); struct rb_node *rb; bool submit = false; - last = port_request(port); - if (last) - /* WaIdleLiteRestore:bdw,skl - * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL - * as we resubmit the request. See gen8_emit_breadcrumb() - * for where we prepare the padding after the end of the - * request. - */ - last->tail = last->wa_tail; - /* Hardware submission is through 2 ports. Conceptually each port * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is * static for a context, and unique to each, so we only execute @@ -495,7 +554,66 @@ static void execlists_dequeue(struct intel_engine_cs *engine) spin_lock_irq(&engine->timeline->lock); rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); - while (rb) { + if (!rb) + goto unlock; + + if (last) { + /* + * Don't resubmit or switch until all outstanding + * preemptions (lite-restore) are seen. Then we + * know the next preemption status we see corresponds + * to this ELSP update. + */ + if (port_count(&port[0]) > 1) + goto unlock; + + if (can_preempt(engine) && + rb_entry(rb, struct i915_priolist, node)->priority > + max(last->priotree.priority, 0)) { + /* + * Switch to our empty preempt context so + * the state of the GPU is known (idle). + */ + inject_preempt_context(engine); + execlists_set_active(execlists, + EXECLISTS_ACTIVE_PREEMPT); + goto unlock; + } else { + /* + * In theory, we could coalesce more requests onto + * the second port (the first port is active, with + * no preemptions pending). However, that means we + * then have to deal with the possible lite-restore + * of the second port (as we submit the ELSP, there + * may be a context-switch) but also we may complete + * the resubmission before the context-switch. Ergo, + * coalescing onto the second port will cause a + * preemption event, but we cannot predict whether + * that will affect port[0] or port[1]. + * + * If the second port is already active, we can wait + * until the next context-switch before contemplating + * new requests. The GPU will be busy and we should be + * able to resubmit the new ELSP before it idles, + * avoiding pipeline bubbles (momentary pauses where + * the driver is unable to keep up the supply of new + * work). + */ + if (port_count(&port[1])) + goto unlock; + + /* WaIdleLiteRestore:bdw,skl + * Apply the wa NOOPs to prevent + * ring:HEAD == req:TAIL as we resubmit the + * request. See gen8_emit_breadcrumb() for + * where we prepare the padding after the + * end of the request. + */ + last->tail = last->wa_tail; + } + } + + do { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); struct drm_i915_gem_request *rq, *rn; @@ -547,8 +665,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } INIT_LIST_HEAD(&rq->priotree.link); - rq->priotree.priority = INT_MAX; - __i915_gem_request_submit(rq); trace_i915_gem_request_in(rq, port_index(port, execlists)); last = rq; @@ -560,27 +676,31 @@ static void execlists_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); - } + } while (rb); done: execlists->first = rb; if (submit) port_assign(port, last); +unlock: spin_unlock_irq(&engine->timeline->lock); - if (submit) + if (submit) { + execlists_set_active(execlists, EXECLISTS_ACTIVE_USER); execlists_submit_ports(engine); + } } static void execlist_cancel_port_requests(struct intel_engine_execlists *execlists) { struct execlist_port *port = execlists->port; - unsigned int num_ports = ARRAY_SIZE(execlists->port); + unsigned int num_ports = execlists_num_ports(execlists); while (num_ports-- && port_isset(port)) { struct drm_i915_gem_request *rq = port_request(port); - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); + GEM_BUG_ON(!execlists->active); + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED); i915_gem_request_put(rq); memset(port, 0, sizeof(*port)); @@ -614,7 +734,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { INIT_LIST_HEAD(&rq->priotree.link); - rq->priotree.priority = INT_MAX; dma_fence_set_error(&rq->fence, -EIO); __i915_gem_request_submit(rq); @@ -645,13 +764,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) spin_unlock_irqrestore(&engine->timeline->lock, flags); } -static bool execlists_elsp_ready(const struct intel_engine_cs *engine) -{ - const struct execlist_port *port = engine->execlists.port; - - return port_count(&port[0]) + port_count(&port[1]) < 2; -} - /* * Check the unread Context Status Buffers and manage the submission of new * contexts to the ELSP accordingly. @@ -660,7 +772,7 @@ static void intel_lrc_irq_handler(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; + struct execlist_port * const port = execlists->port; struct drm_i915_private *dev_priv = engine->i915; /* We can skip acquiring intel_runtime_pm_get() here as it was taken @@ -684,7 +796,6 @@ static void intel_lrc_irq_handler(unsigned long data) &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; unsigned int head, tail; - /* However GVT emulation depends upon intercepting CSB mmio */ if (unlikely(execlists->csb_use_mmio)) { buf = (u32 * __force) (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); @@ -745,6 +856,29 @@ static void intel_lrc_irq_handler(unsigned long data) if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) continue; + if (status & GEN8_CTX_STATUS_ACTIVE_IDLE && + buf[2*head + 1] == PREEMPT_ID) { + execlist_cancel_port_requests(execlists); + + spin_lock_irq(&engine->timeline->lock); + unwind_incomplete_requests(engine); + spin_unlock_irq(&engine->timeline->lock); + + GEM_BUG_ON(!execlists_is_active(execlists, + EXECLISTS_ACTIVE_PREEMPT)); + execlists_clear_active(execlists, + EXECLISTS_ACTIVE_PREEMPT); + continue; + } + + if (status & GEN8_CTX_STATUS_PREEMPTED && + execlists_is_active(execlists, + EXECLISTS_ACTIVE_PREEMPT)) + continue; + + GEM_BUG_ON(!execlists_is_active(execlists, + EXECLISTS_ACTIVE_USER)); + /* Check the context/desc id for this event matches */ GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); @@ -766,6 +900,9 @@ static void intel_lrc_irq_handler(unsigned long data) /* After the final element, the hw should be idle */ GEM_BUG_ON(port_count(port) == 0 && !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); + if (port_count(port) == 0) + execlists_clear_active(execlists, + EXECLISTS_ACTIVE_USER); } if (head != execlists->csb_head) { @@ -775,7 +912,7 @@ static void intel_lrc_irq_handler(unsigned long data) } } - if (execlists_elsp_ready(engine)) + if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)) execlists_dequeue(engine); intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); @@ -788,7 +925,7 @@ static void insert_request(struct intel_engine_cs *engine, struct i915_priolist *p = lookup_priolist(engine, pt, prio); list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests); - if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(engine)) + if (ptr_unmask_bits(p, 1)) tasklet_hi_schedule(&engine->execlists.irq_tasklet); } @@ -808,11 +945,15 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) spin_unlock_irqrestore(&engine->timeline->lock, flags); } +static struct drm_i915_gem_request *pt_to_request(struct i915_priotree *pt) +{ + return container_of(pt, struct drm_i915_gem_request, priotree); +} + static struct intel_engine_cs * pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) { - struct intel_engine_cs *engine = - container_of(pt, struct drm_i915_gem_request, priotree)->engine; + struct intel_engine_cs *engine = pt_to_request(pt)->engine; GEM_BUG_ON(!locked); @@ -831,6 +972,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) struct i915_dependency stack; LIST_HEAD(dfs); + GEM_BUG_ON(prio == I915_PRIORITY_INVALID); + if (prio <= READ_ONCE(request->priotree.priority)) return; @@ -866,6 +1009,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) * engines. */ list_for_each_entry(p, &pt->signalers_list, signal_link) { + if (i915_gem_request_completed(pt_to_request(p->signaler))) + continue; + GEM_BUG_ON(p->signaler->priority < pt->priority); if (prio > READ_ONCE(p->signaler->priority)) list_move_tail(&p->dfs_link, &dfs); @@ -879,7 +1025,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) * execlists_submit_request()), we can set our own priority and skip * acquiring the engine locks. */ - if (request->priotree.priority == INT_MIN) { + if (request->priotree.priority == I915_PRIORITY_INVALID) { GEM_BUG_ON(!list_empty(&request->priotree.link)); request->priotree.priority = prio; if (stack.dfs_link.next == stack.dfs_link.prev) @@ -909,8 +1055,6 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) } spin_unlock_irq(&engine->timeline->lock); - - /* XXX Do we need to preempt to make room for us and our deps? */ } static struct intel_ring * @@ -960,6 +1104,7 @@ execlists_context_pin(struct intel_engine_cs *engine, i915_ggtt_offset(ce->ring->vma); ce->state->obj->mm.dirty = true; + ce->state->obj->pin_global++; i915_gem_context_get(ctx); out: @@ -987,6 +1132,7 @@ static void execlists_context_unpin(struct intel_engine_cs *engine, intel_ring_unpin(ce->ring); + ce->state->obj->pin_global--; i915_gem_object_unpin_map(ce->state->obj); i915_vma_unpin(ce->state); @@ -1106,6 +1252,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES); + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + /* Pad to end of cacheline */ while ((unsigned long)batch % CACHELINE_BYTES) *batch++ = MI_NOOP; @@ -1119,26 +1267,10 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) return batch; } -/* - * This batch is started immediately after indirect_ctx batch. Since we ensure - * that indirect_ctx ends on a cacheline this batch is aligned automatically. - * - * The number of DWORDS written are returned using this field. - * - * This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding - * to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant. - */ -static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) -{ - /* WaDisableCtxRestoreArbitration:bdw,chv */ - *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - *batch++ = MI_BATCH_BUFFER_END; - - return batch; -} - static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) { + *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ batch = gen8_emit_flush_coherentl3_wa(engine, batch); @@ -1184,6 +1316,8 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) *batch++ = 0; } + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + /* Pad to end of cacheline */ while ((unsigned long)batch % CACHELINE_BYTES) *batch++ = MI_NOOP; @@ -1251,7 +1385,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) break; case 8: wa_bb_fn[0] = gen8_init_indirectctx_bb; - wa_bb_fn[1] = gen8_init_perctx_bb; + wa_bb_fn[1] = NULL; break; default: MISSING_CASE(INTEL_GEN(engine->i915)); @@ -1337,6 +1471,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); execlists->csb_head = -1; + execlists->active = 0; /* After a GPU reset, we may have requests to replay */ if (!i915_modparams.enable_guc_submission && execlists->first) @@ -1382,7 +1517,6 @@ static void reset_common_ring(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct drm_i915_gem_request *rq, *rn; struct intel_context *ce; unsigned long flags; @@ -1400,21 +1534,7 @@ static void reset_common_ring(struct intel_engine_cs *engine, execlist_cancel_port_requests(execlists); /* Push back any incomplete requests for replay after the reset. */ - list_for_each_entry_safe_reverse(rq, rn, - &engine->timeline->requests, link) { - struct i915_priolist *p; - - if (i915_gem_request_completed(rq)) - break; - - __i915_gem_request_unsubmit(rq); - - p = lookup_priolist(engine, - &rq->priotree, - rq->priotree.priority); - list_add(&rq->priotree.link, - &ptr_mask_bits(p, 1)->requests); - } + unwind_incomplete_requests(engine); spin_unlock_irqrestore(&engine->timeline->lock, flags); @@ -1451,10 +1571,7 @@ static void reset_common_ring(struct intel_engine_cs *engine, intel_ring_update_space(request->ring); /* Reset WaIdleLiteRestore:bdw,skl as well */ - request->tail = - intel_ring_wrap(request->ring, - request->wa_tail - WA_TAIL_DWORDS*sizeof(u32)); - assert_ring_tail_valid(request->ring, request->tail); + unwind_wa_tail(request); } static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) @@ -1513,13 +1630,31 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, if (IS_ERR(cs)) return PTR_ERR(cs); + /* + * WaDisableCtxRestoreArbitration:bdw,chv + * + * We don't need to perform MI_ARB_ENABLE as often as we do (in + * particular all the gen that do not need the w/a at all!), if we + * took care to make sure that on every switch into this context + * (both ordinary and for preemption) that arbitrartion was enabled + * we would be fine. However, there doesn't seem to be a downside to + * being paranoid and making sure it is set before each batch and + * every context-switch. + * + * Note that if we fail to enable arbitration before the request + * is complete, then we do not see the context-switch interrupt and + * the engine hangs (with RING_HEAD == RING_TAIL). + * + * That satisfies both the GPGPU w/a and our heavy-handed paranoia. + */ + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + /* FIXME(BDW): Address space and security selectors. */ *cs++ = MI_BATCH_BUFFER_START_GEN8 | (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)) | (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); - *cs++ = MI_NOOP; intel_ring_advance(req, cs); return 0; @@ -1648,7 +1783,8 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, */ static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs) { - *cs++ = MI_NOOP; + /* Ensure there's always at least one preemption point per-request. */ + *cs++ = MI_ARB_CHECK; *cs++ = MI_NOOP; request->wa_tail = intel_ring_offset(request, cs); } @@ -1669,7 +1805,6 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) gen8_emit_wa_tail(request, cs); } - static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, @@ -1697,7 +1832,6 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, gen8_emit_wa_tail(request, cs); } - static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS; static int gen8_init_rcs_context(struct drm_i915_gem_request *req) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 314adee7127a..689fde1a63a9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -61,6 +61,7 @@ enum { INTEL_CONTEXT_SCHEDULE_IN = 0, INTEL_CONTEXT_SCHEDULE_OUT, + INTEL_CONTEXT_SCHEDULE_PREEMPTED, }; /* Logical Rings */ diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index beb9baaf2f2e..dcbc786479f9 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -56,7 +56,7 @@ static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon *lspcon) struct i2c_adapter *adapter = &lspcon_to_intel_dp(lspcon)->aux.ddc; if (drm_lspcon_get_mode(adapter, ¤t_mode)) { - DRM_ERROR("Error reading LSPCON mode\n"); + DRM_DEBUG_KMS("Error reading LSPCON mode\n"); return DRM_LSPCON_MODE_INVALID; } return current_mode; @@ -68,16 +68,15 @@ static enum drm_lspcon_mode lspcon_wait_mode(struct intel_lspcon *lspcon, enum drm_lspcon_mode current_mode; current_mode = lspcon_get_current_mode(lspcon); - if (current_mode == mode || current_mode == DRM_LSPCON_MODE_INVALID) + if (current_mode == mode) goto out; DRM_DEBUG_KMS("Waiting for LSPCON mode %s to settle\n", lspcon_mode_name(mode)); - wait_for((current_mode = lspcon_get_current_mode(lspcon)) == mode || - current_mode == DRM_LSPCON_MODE_INVALID, 100); + wait_for((current_mode = lspcon_get_current_mode(lspcon)) == mode, 100); if (current_mode != mode) - DRM_DEBUG_KMS("LSPCON mode hasn't settled\n"); + DRM_ERROR("LSPCON mode hasn't settled\n"); out: DRM_DEBUG_KMS("Current LSPCON mode %s\n", @@ -133,6 +132,7 @@ static bool lspcon_wake_native_aux_ch(struct intel_lspcon *lspcon) static bool lspcon_probe(struct intel_lspcon *lspcon) { + int retry; enum drm_dp_dual_mode_type adaptor_type; struct i2c_adapter *adapter = &lspcon_to_intel_dp(lspcon)->aux.ddc; enum drm_lspcon_mode expected_mode; @@ -141,10 +141,18 @@ static bool lspcon_probe(struct intel_lspcon *lspcon) DRM_LSPCON_MODE_PCON : DRM_LSPCON_MODE_LS; /* Lets probe the adaptor and check its type */ - adaptor_type = drm_dp_dual_mode_detect(adapter); + for (retry = 0; retry < 6; retry++) { + if (retry) + usleep_range(500, 1000); + + adaptor_type = drm_dp_dual_mode_detect(adapter); + if (adaptor_type == DRM_DP_DUAL_MODE_LSPCON) + break; + } + if (adaptor_type != DRM_DP_DUAL_MODE_LSPCON) { DRM_DEBUG_KMS("No LSPCON detected, found %s\n", - drm_dp_get_dual_mode_type_name(adaptor_type)); + drm_dp_get_dual_mode_type_name(adaptor_type)); return false; } diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index a55954a89148..38572d65e46e 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -939,10 +939,8 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) struct drm_display_mode *fixed_mode = NULL; struct drm_display_mode *downclock_mode = NULL; struct edid *edid; - struct intel_crtc *crtc; i915_reg_t lvds_reg; u32 lvds; - int pipe; u8 pin; u32 allowed_scalers; @@ -1113,22 +1111,11 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) * on. If so, assume that whatever is currently programmed is the * correct mode. */ - - /* Ironlake: FIXME if still fail, not try pipe mode now */ - if (HAS_PCH_SPLIT(dev_priv)) - goto failed; - - pipe = (lvds & LVDS_PIPEB_SELECT) ? 1 : 0; - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - - if (crtc && (lvds & LVDS_PORT_EN)) { - fixed_mode = intel_crtc_mode_get(dev, &crtc->base); - if (fixed_mode) { - DRM_DEBUG_KMS("using current (BIOS) mode: "); - drm_mode_debug_printmodeline(fixed_mode); - fixed_mode->type |= DRM_MODE_TYPE_PREFERRED; - goto out; - } + fixed_mode = intel_encoder_current_mode(intel_encoder); + if (fixed_mode) { + DRM_DEBUG_KMS("using current (BIOS) mode: "); + drm_mode_debug_printmodeline(fixed_mode); + fixed_mode->type |= DRM_MODE_TYPE_PREFERRED; } /* If we still don't have a mode after all that, give up. */ diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index 96043a51c1bf..899839f2f7c6 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -206,11 +206,11 @@ static const char *pipe_crc_source_name(enum intel_pipe_crc_source source) static int display_crc_ctl_show(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = m->private; - int i; + enum pipe pipe; - for (i = 0; i < I915_MAX_PIPES; i++) - seq_printf(m, "%c %s\n", pipe_name(i), - pipe_crc_source_name(dev_priv->pipe_crc[i].source)); + for_each_pipe(dev_priv, pipe) + seq_printf(m, "%c %s\n", pipe_name(pipe), + pipe_crc_source_name(dev_priv->pipe_crc[pipe].source)); return 0; } @@ -775,11 +775,12 @@ display_crc_ctl_parse_object(const char *buf, enum intel_pipe_crc_object *o) return -EINVAL; } -static int display_crc_ctl_parse_pipe(const char *buf, enum pipe *pipe) +static int display_crc_ctl_parse_pipe(struct drm_i915_private *dev_priv, + const char *buf, enum pipe *pipe) { const char name = buf[0]; - if (name < 'A' || name >= pipe_name(I915_MAX_PIPES)) + if (name < 'A' || name >= pipe_name(INTEL_INFO(dev_priv)->num_pipes)) return -EINVAL; *pipe = name - 'A'; @@ -828,7 +829,7 @@ static int display_crc_ctl_parse(struct drm_i915_private *dev_priv, return -EINVAL; } - if (display_crc_ctl_parse_pipe(words[1], &pipe) < 0) { + if (display_crc_ctl_parse_pipe(dev_priv, words[1], &pipe) < 0) { DRM_DEBUG_DRIVER("unknown pipe %s\n", words[1]); return -EINVAL; } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c66af09e27a7..f4a4e9496893 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -322,7 +322,7 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) { u32 val; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); if (enable) @@ -337,14 +337,14 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) DRM_ERROR("timed out waiting for Punit DDR DVFS request\n"); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) { u32 val; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); if (enable) @@ -353,7 +353,7 @@ static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) val &= ~DSP_MAXFIFO_PM5_ENABLE; vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } #define FW_WM(value, plane) \ @@ -2721,9 +2721,9 @@ static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv, const struct intel_crtc *intel_crtc, int level, struct intel_crtc_state *cstate, - struct intel_plane_state *pristate, - struct intel_plane_state *sprstate, - struct intel_plane_state *curstate, + const struct intel_plane_state *pristate, + const struct intel_plane_state *sprstate, + const struct intel_plane_state *curstate, struct intel_wm_level *result) { uint16_t pri_latency = dev_priv->wm.pri_latency[level]; @@ -2790,11 +2790,11 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, /* read the first set of memory latencies[0:3] */ val = 0; /* data0 to be programmed to 0 for first set */ - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_READ_MEM_LATENCY, &val); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("SKL Mailbox read error = %d\n", ret); @@ -2811,11 +2811,11 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, /* read the second set of memory latencies[4:7] */ val = 1; /* data0 to be programmed to 1 for second set */ - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_READ_MEM_LATENCY, &val); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("SKL Mailbox read error = %d\n", ret); return; @@ -3043,28 +3043,24 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) struct intel_pipe_wm *pipe_wm; struct drm_device *dev = state->dev; const struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *intel_plane; - struct intel_plane_state *pristate = NULL; - struct intel_plane_state *sprstate = NULL; - struct intel_plane_state *curstate = NULL; + struct drm_plane *plane; + const struct drm_plane_state *plane_state; + const struct intel_plane_state *pristate = NULL; + const struct intel_plane_state *sprstate = NULL; + const struct intel_plane_state *curstate = NULL; int level, max_level = ilk_wm_max_level(dev_priv), usable_level; struct ilk_wm_maximums max; pipe_wm = &cstate->wm.ilk.optimal; - for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { - struct intel_plane_state *ps; - - ps = intel_atomic_get_existing_plane_state(state, - intel_plane); - if (!ps) - continue; + drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, &cstate->base) { + const struct intel_plane_state *ps = to_intel_plane_state(plane_state); - if (intel_plane->base.type == DRM_PLANE_TYPE_PRIMARY) + if (plane->type == DRM_PLANE_TYPE_PRIMARY) pristate = ps; - else if (intel_plane->base.type == DRM_PLANE_TYPE_OVERLAY) + else if (plane->type == DRM_PLANE_TYPE_OVERLAY) sprstate = ps; - else if (intel_plane->base.type == DRM_PLANE_TYPE_CURSOR) + else if (plane->type == DRM_PLANE_TYPE_CURSOR) curstate = ps; } @@ -3086,11 +3082,9 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) if (pipe_wm->sprites_scaled) usable_level = 0; - ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate, - pristate, sprstate, curstate, &pipe_wm->raw_wm[0]); - memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm)); - pipe_wm->wm[0] = pipe_wm->raw_wm[0]; + ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate, + pristate, sprstate, curstate, &pipe_wm->wm[0]); if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) pipe_wm->linetime = hsw_compute_linetime_wm(cstate); @@ -3100,8 +3094,8 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) ilk_compute_wm_reg_maximums(dev_priv, 1, &max); - for (level = 1; level <= max_level; level++) { - struct intel_wm_level *wm = &pipe_wm->raw_wm[level]; + for (level = 1; level <= usable_level; level++) { + struct intel_wm_level *wm = &pipe_wm->wm[level]; ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate, pristate, sprstate, curstate, wm); @@ -3111,13 +3105,10 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) * register maximums since such watermarks are * always invalid. */ - if (level > usable_level) - continue; - - if (ilk_validate_wm_level(level, &max, wm)) - pipe_wm->wm[level] = *wm; - else - usable_level = level; + if (!ilk_validate_wm_level(level, &max, wm)) { + memset(wm, 0, sizeof(*wm)); + break; + } } return 0; @@ -3133,7 +3124,11 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev, struct intel_crtc_state *newstate) { struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate; - struct intel_pipe_wm *b = &intel_crtc->wm.active.ilk; + struct intel_atomic_state *intel_state = + to_intel_atomic_state(newstate->base.state); + const struct intel_crtc_state *oldstate = + intel_atomic_get_old_crtc_state(intel_state, intel_crtc); + const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal; int level, max_level = ilk_wm_max_level(to_i915(dev)); /* @@ -3142,6 +3137,9 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev, * and after the vblank. */ *a = newstate->wm.ilk.optimal; + if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base)) + return 0; + a->pipe_enabled |= b->pipe_enabled; a->sprites_enabled |= b->sprites_enabled; a->sprites_scaled |= b->sprites_scaled; @@ -3608,13 +3606,13 @@ intel_enable_sagv(struct drm_i915_private *dev_priv) return 0; DRM_DEBUG_KMS("Enabling the SAGV\n"); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL, GEN9_SAGV_ENABLE); /* We don't need to wait for the SAGV when enabling */ - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); /* * Some skl systems, pre-release machines in particular, @@ -3645,14 +3643,14 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) return 0; DRM_DEBUG_KMS("Disabling the SAGV\n"); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); /* bspec says to keep retrying for at least 1 ms */ ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL, GEN9_SAGV_DISABLE, GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED, 1); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); /* * Some skl systems, pre-release machines in particular, @@ -4820,16 +4818,18 @@ static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, return a->start < b->end && b->start < a->end; } -bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, +bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv, + const struct skl_ddb_entry **entries, const struct skl_ddb_entry *ddb, int ignore) { - int i; + enum pipe pipe; - for (i = 0; i < I915_MAX_PIPES; i++) - if (i != ignore && entries[i] && - skl_ddb_entries_overlap(ddb, entries[i])) + for_each_pipe(dev_priv, pipe) { + if (pipe != ignore && entries[pipe] && + skl_ddb_entries_overlap(ddb, entries[pipe])) return true; + } return false; } @@ -5619,7 +5619,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev) wm->level = VLV_WM_LEVEL_PM2; if (IS_CHERRYVIEW(dev_priv)) { - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); if (val & DSP_MAXFIFO_PM5_ENABLE) @@ -5649,7 +5649,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev) wm->level = VLV_WM_LEVEL_DDR_DVFS; } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } for_each_intel_crtc(dev, crtc) { @@ -5753,12 +5753,30 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv) mutex_unlock(&dev_priv->wm.wm_mutex); } +/* + * FIXME should probably kill this and improve + * the real watermark readout/sanitation instead + */ +static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv) +{ + I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); + I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); + I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN); + + /* + * Don't touch WM1S_LP_EN here. + * Doing so could cause underruns. + */ +} + void ilk_wm_get_hw_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct ilk_wm_values *hw = &dev_priv->wm.hw; struct drm_crtc *crtc; + ilk_init_lp_watermarks(dev_priv); + for_each_crtc(dev, crtc) ilk_pipe_wm_get_hw_state(crtc); @@ -5827,6 +5845,12 @@ void intel_enable_ipc(struct drm_i915_private *dev_priv) { u32 val; + /* Display WA #0477 WaDisableIPC: skl */ + if (IS_SKYLAKE(dev_priv)) { + dev_priv->ipc_enabled = false; + return; + } + val = I915_READ(DISP_ARB_CTL2); if (dev_priv->ipc_enabled) @@ -5980,6 +6004,7 @@ static void ironlake_disable_drps(struct drm_i915_private *dev_priv) */ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 limits; /* Only set the down limit when we've reached the lowest level to avoid @@ -5989,13 +6014,13 @@ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) * frequency, if the down threshold expires in that window we will not * receive a down interrupt. */ if (INTEL_GEN(dev_priv) >= 9) { - limits = (dev_priv->rps.max_freq_softlimit) << 23; - if (val <= dev_priv->rps.min_freq_softlimit) - limits |= (dev_priv->rps.min_freq_softlimit) << 14; + limits = (rps->max_freq_softlimit) << 23; + if (val <= rps->min_freq_softlimit) + limits |= (rps->min_freq_softlimit) << 14; } else { - limits = dev_priv->rps.max_freq_softlimit << 24; - if (val <= dev_priv->rps.min_freq_softlimit) - limits |= dev_priv->rps.min_freq_softlimit << 16; + limits = rps->max_freq_softlimit << 24; + if (val <= rps->min_freq_softlimit) + limits |= rps->min_freq_softlimit << 16; } return limits; @@ -6003,39 +6028,40 @@ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; int new_power; u32 threshold_up = 0, threshold_down = 0; /* in % */ u32 ei_up = 0, ei_down = 0; - new_power = dev_priv->rps.power; - switch (dev_priv->rps.power) { + new_power = rps->power; + switch (rps->power) { case LOW_POWER: - if (val > dev_priv->rps.efficient_freq + 1 && - val > dev_priv->rps.cur_freq) + if (val > rps->efficient_freq + 1 && + val > rps->cur_freq) new_power = BETWEEN; break; case BETWEEN: - if (val <= dev_priv->rps.efficient_freq && - val < dev_priv->rps.cur_freq) + if (val <= rps->efficient_freq && + val < rps->cur_freq) new_power = LOW_POWER; - else if (val >= dev_priv->rps.rp0_freq && - val > dev_priv->rps.cur_freq) + else if (val >= rps->rp0_freq && + val > rps->cur_freq) new_power = HIGH_POWER; break; case HIGH_POWER: - if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && - val < dev_priv->rps.cur_freq) + if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && + val < rps->cur_freq) new_power = BETWEEN; break; } /* Max/min bins are special */ - if (val <= dev_priv->rps.min_freq_softlimit) + if (val <= rps->min_freq_softlimit) new_power = LOW_POWER; - if (val >= dev_priv->rps.max_freq_softlimit) + if (val >= rps->max_freq_softlimit) new_power = HIGH_POWER; - if (new_power == dev_priv->rps.power) + if (new_power == rps->power) return; /* Note the units here are not exactly 1us, but 1280ns. */ @@ -6098,20 +6124,21 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) GEN6_RP_DOWN_IDLE_AVG); skip_hw_write: - dev_priv->rps.power = new_power; - dev_priv->rps.up_threshold = threshold_up; - dev_priv->rps.down_threshold = threshold_down; - dev_priv->rps.last_adj = 0; + rps->power = new_power; + rps->up_threshold = threshold_up; + rps->down_threshold = threshold_down; + rps->last_adj = 0; } static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 mask = 0; /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */ - if (val > dev_priv->rps.min_freq_softlimit) + if (val > rps->min_freq_softlimit) mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; - if (val < dev_priv->rps.max_freq_softlimit) + if (val < rps->max_freq_softlimit) mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; mask &= dev_priv->pm_rps_events; @@ -6124,10 +6151,12 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* min/max delay may still have been modified so be sure to * write the limits value. */ - if (val != dev_priv->rps.cur_freq) { + if (val != rps->cur_freq) { gen6_set_rps_thresholds(dev_priv, val); if (INTEL_GEN(dev_priv) >= 9) @@ -6149,7 +6178,7 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val)); I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - dev_priv->rps.cur_freq = val; + rps->cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); return 0; @@ -6165,7 +6194,7 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - if (val != dev_priv->rps.cur_freq) { + if (val != dev_priv->gt_pm.rps.cur_freq) { err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); if (err) return err; @@ -6173,7 +6202,7 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) gen6_set_rps_thresholds(dev_priv, val); } - dev_priv->rps.cur_freq = val; + dev_priv->gt_pm.rps.cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); return 0; @@ -6188,10 +6217,11 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) */ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) { - u32 val = dev_priv->rps.idle_freq; + struct intel_rps *rps = &dev_priv->gt_pm.rps; + u32 val = rps->idle_freq; int err; - if (dev_priv->rps.cur_freq <= val) + if (rps->cur_freq <= val) return; /* The punit delays the write of the frequency and voltage until it @@ -6216,34 +6246,38 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) void gen6_rps_busy(struct drm_i915_private *dev_priv) { - mutex_lock(&dev_priv->rps.hw_lock); - if (dev_priv->rps.enabled) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + mutex_lock(&dev_priv->pcu_lock); + if (rps->enabled) { u8 freq; if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED) gen6_rps_reset_ei(dev_priv); I915_WRITE(GEN6_PMINTRMSK, - gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); + gen6_rps_pm_mask(dev_priv, rps->cur_freq)); gen6_enable_rps_interrupts(dev_priv); /* Use the user's desired frequency as a guide, but for better * performance, jump directly to RPe as our starting frequency. */ - freq = max(dev_priv->rps.cur_freq, - dev_priv->rps.efficient_freq); + freq = max(rps->cur_freq, + rps->efficient_freq); if (intel_set_rps(dev_priv, clamp(freq, - dev_priv->rps.min_freq_softlimit, - dev_priv->rps.max_freq_softlimit))) + rps->min_freq_softlimit, + rps->max_freq_softlimit))) DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } void gen6_rps_idle(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* Flush our bottom-half so that it does not race with us * setting the idle frequency and so that it is bounded by * our rpm wakeref. And then disable the interrupts to stop any @@ -6251,36 +6285,36 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) */ gen6_disable_rps_interrupts(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); - if (dev_priv->rps.enabled) { + mutex_lock(&dev_priv->pcu_lock); + if (rps->enabled) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_set_rps_idle(dev_priv); else - gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); - dev_priv->rps.last_adj = 0; + gen6_set_rps(dev_priv, rps->idle_freq); + rps->last_adj = 0; I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0)); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } void gen6_rps_boost(struct drm_i915_gem_request *rq, - struct intel_rps_client *rps) + struct intel_rps_client *rps_client) { - struct drm_i915_private *i915 = rq->i915; + struct intel_rps *rps = &rq->i915->gt_pm.rps; unsigned long flags; bool boost; /* This is intentionally racy! We peek at the state here, then * validate inside the RPS worker. */ - if (!i915->rps.enabled) + if (!rps->enabled) return; boost = false; spin_lock_irqsave(&rq->lock, flags); if (!rq->waitboost && !i915_gem_request_completed(rq)) { - atomic_inc(&i915->rps.num_waiters); + atomic_inc(&rps->num_waiters); rq->waitboost = true; boost = true; } @@ -6288,22 +6322,23 @@ void gen6_rps_boost(struct drm_i915_gem_request *rq, if (!boost) return; - if (READ_ONCE(i915->rps.cur_freq) < i915->rps.boost_freq) - schedule_work(&i915->rps.work); + if (READ_ONCE(rps->cur_freq) < rps->boost_freq) + schedule_work(&rps->work); - atomic_inc(rps ? &rps->boosts : &i915->rps.boosts); + atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts); } int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; int err; - lockdep_assert_held(&dev_priv->rps.hw_lock); - GEM_BUG_ON(val > dev_priv->rps.max_freq); - GEM_BUG_ON(val < dev_priv->rps.min_freq); + lockdep_assert_held(&dev_priv->pcu_lock); + GEM_BUG_ON(val > rps->max_freq); + GEM_BUG_ON(val < rps->min_freq); - if (!dev_priv->rps.enabled) { - dev_priv->rps.cur_freq = val; + if (!rps->enabled) { + rps->cur_freq = val; return 0; } @@ -6326,21 +6361,30 @@ static void gen9_disable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RP_CONTROL, 0); } -static void gen6_disable_rps(struct drm_i915_private *dev_priv) +static void gen6_disable_rc6(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RC_CONTROL, 0); +} + +static void gen6_disable_rps(struct drm_i915_private *dev_priv) +{ I915_WRITE(GEN6_RPNSWREQ, 1 << 31); I915_WRITE(GEN6_RP_CONTROL, 0); } -static void cherryview_disable_rps(struct drm_i915_private *dev_priv) +static void cherryview_disable_rc6(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RC_CONTROL, 0); } -static void valleyview_disable_rps(struct drm_i915_private *dev_priv) +static void cherryview_disable_rps(struct drm_i915_private *dev_priv) +{ + I915_WRITE(GEN6_RP_CONTROL, 0); +} + +static void valleyview_disable_rc6(struct drm_i915_private *dev_priv) { - /* we're doing forcewake before Disabling RC6, + /* We're doing forcewake before Disabling RC6, * This what the BIOS expects when going into suspend */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -6349,6 +6393,11 @@ static void valleyview_disable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } +static void valleyview_disable_rps(struct drm_i915_private *dev_priv) +{ + I915_WRITE(GEN6_RP_CONTROL, 0); +} + static void intel_print_rc6_info(struct drm_i915_private *dev_priv, u32 mode) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { @@ -6471,24 +6520,26 @@ int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6) static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* All of these values are in units of 50MHz */ /* static values from HW: RP0 > RP1 > RPn (min_freq) */ if (IS_GEN9_LP(dev_priv)) { u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); - dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff; - dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; - dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff; + rps->rp0_freq = (rp_state_cap >> 16) & 0xff; + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; + rps->min_freq = (rp_state_cap >> 0) & 0xff; } else { u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); - dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; - dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; - dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; + rps->rp0_freq = (rp_state_cap >> 0) & 0xff; + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; + rps->min_freq = (rp_state_cap >> 16) & 0xff; } /* hw_max = RP0 until we check for overclocking */ - dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; + rps->max_freq = rps->rp0_freq; - dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; + rps->efficient_freq = rps->rp1_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { u32 ddcc_status = 0; @@ -6496,33 +6547,34 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) if (sandybridge_pcode_read(dev_priv, HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, &ddcc_status) == 0) - dev_priv->rps.efficient_freq = + rps->efficient_freq = clamp_t(u8, ((ddcc_status >> 8) & 0xff), - dev_priv->rps.min_freq, - dev_priv->rps.max_freq); + rps->min_freq, + rps->max_freq); } if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { /* Store the frequency values in 16.66 MHZ units, which is * the natural hardware unit for SKL */ - dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER; - dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER; - dev_priv->rps.min_freq *= GEN9_FREQ_SCALER; - dev_priv->rps.max_freq *= GEN9_FREQ_SCALER; - dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER; + rps->rp0_freq *= GEN9_FREQ_SCALER; + rps->rp1_freq *= GEN9_FREQ_SCALER; + rps->min_freq *= GEN9_FREQ_SCALER; + rps->max_freq *= GEN9_FREQ_SCALER; + rps->efficient_freq *= GEN9_FREQ_SCALER; } } static void reset_rps(struct drm_i915_private *dev_priv, int (*set)(struct drm_i915_private *, u8)) { - u8 freq = dev_priv->rps.cur_freq; + struct intel_rps *rps = &dev_priv->gt_pm.rps; + u8 freq = rps->cur_freq; /* force a reset */ - dev_priv->rps.power = -1; - dev_priv->rps.cur_freq = -1; + rps->power = -1; + rps->cur_freq = -1; if (set(dev_priv, freq)) DRM_ERROR("Failed to reset RPS to initial values\n"); @@ -6535,7 +6587,7 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv) /* Program defaults and thresholds for RPS*/ I915_WRITE(GEN6_RC_VIDEO_FREQ, - GEN9_FREQUENCY(dev_priv->rps.rp1_freq)); + GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq)); /* 1 second timeout*/ I915_WRITE(GEN6_RP_DOWN_TIMEOUT, @@ -6555,7 +6607,7 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; - uint32_t rc6_mask = 0; + u32 rc6_mode, rc6_mask = 0; /* 1a: Software RC state - RC0 */ I915_WRITE(GEN6_RC_STATE, 0); @@ -6589,12 +6641,19 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25); /* 3a: Enable RC6 */ - if (intel_enable_rc6() & INTEL_RC6_ENABLE) + if (intel_rc6_enabled() & INTEL_RC6_ENABLE) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE)); I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ + + /* WaRsUseTimeoutMode:cnl (pre-prod) */ + if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0)) + rc6_mode = GEN7_RC_CTL_TO_MODE; + else + rc6_mode = GEN6_RC_CTL_EI_MODE(1); + I915_WRITE(GEN6_RC_CONTROL, - GEN6_RC_CTL_HW_ENABLE | GEN6_RC_CTL_EI_MODE(1) | rc6_mask); + GEN6_RC_CTL_HW_ENABLE | rc6_mode | rc6_mask); /* * 3b: Enable Coarse Power Gating only when RC6 is enabled. @@ -6609,7 +6668,7 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void gen8_enable_rps(struct drm_i915_private *dev_priv) +static void gen8_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -6618,7 +6677,7 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) /* 1a: Software RC state - RC0 */ I915_WRITE(GEN6_RC_STATE, 0); - /* 1c & 1d: Get forcewake during program sequence. Although the driver + /* 1b: Get forcewake during program sequence. Although the driver * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -6632,36 +6691,38 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); I915_WRITE(GEN6_RC_SLEEP, 0); - if (IS_BROADWELL(dev_priv)) - I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ - else - I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ /* 3: Enable RC6 */ - if (intel_enable_rc6() & INTEL_RC6_ENABLE) + if (intel_rc6_enabled() & INTEL_RC6_ENABLE) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; intel_print_rc6_info(dev_priv, rc6_mask); - if (IS_BROADWELL(dev_priv)) - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN7_RC_CTL_TO_MODE | - rc6_mask); - else - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_EI_MODE(1) | - rc6_mask); - /* 4 Program defaults and thresholds for RPS*/ + I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | + GEN7_RC_CTL_TO_MODE | + rc6_mask); + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); +} + +static void gen8_enable_rps(struct drm_i915_private *dev_priv) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + /* 1 Program defaults and thresholds for RPS*/ I915_WRITE(GEN6_RPNSWREQ, - HSW_FREQUENCY(dev_priv->rps.rp1_freq)); + HSW_FREQUENCY(rps->rp1_freq)); I915_WRITE(GEN6_RC_VIDEO_FREQ, - HSW_FREQUENCY(dev_priv->rps.rp1_freq)); + HSW_FREQUENCY(rps->rp1_freq)); /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ /* Docs recommend 900MHz, and 300 MHz respectively */ I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, - dev_priv->rps.max_freq_softlimit << 24 | - dev_priv->rps.min_freq_softlimit << 16); + rps->max_freq_softlimit << 24 | + rps->min_freq_softlimit << 16); I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ @@ -6670,7 +6731,7 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - /* 5: Enable RPS */ + /* 2: Enable RPS */ I915_WRITE(GEN6_RP_CONTROL, GEN6_RP_MEDIA_TURBO | GEN6_RP_MEDIA_HW_NORMAL_MODE | @@ -6679,14 +6740,12 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_AVG); - /* 6: Ring frequency + overclocking (our driver does this later */ - reset_rps(dev_priv, gen6_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void gen6_enable_rps(struct drm_i915_private *dev_priv) +static void gen6_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -6695,14 +6754,6 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) int rc6_mode; int ret; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - - /* Here begins a magic sequence of register writes to enable - * auto-downclocking. - * - * Perhaps there might be some value in exposing these to - * userspace... - */ I915_WRITE(GEN6_RC_STATE, 0); /* Clear the DBG now so we don't confuse earlier errors */ @@ -6736,7 +6787,7 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ /* Check if we are enabling RC6 */ - rc6_mode = intel_enable_rc6(); + rc6_mode = intel_rc6_enabled(); if (rc6_mode & INTEL_RC6_ENABLE) rc6_mask |= GEN6_RC_CTL_RC6_ENABLE; @@ -6756,12 +6807,6 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) GEN6_RC_CTL_EI_MODE(1) | GEN6_RC_CTL_HW_ENABLE); - /* Power down if completely idle for over 50ms */ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - - reset_rps(dev_priv, gen6_set_rps); - rc6vids = 0; ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); if (IS_GEN6(dev_priv) && ret) { @@ -6779,8 +6824,28 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } +static void gen6_enable_rps(struct drm_i915_private *dev_priv) +{ + /* Here begins a magic sequence of register writes to enable + * auto-downclocking. + * + * Perhaps there might be some value in exposing these to + * userspace... + */ + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + /* Power down if completely idle for over 50ms */ + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + + reset_rps(dev_priv, gen6_set_rps); + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); +} + static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; int min_freq = 15; unsigned int gpu_freq; unsigned int max_ia_freq, min_ring_freq; @@ -6788,7 +6853,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) int scaling_factor = 180; struct cpufreq_policy *policy; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); policy = cpufreq_cpu_get(0); if (policy) { @@ -6811,11 +6876,11 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { /* Convert GT frequency to 50 HZ units */ - min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER; - max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER; + min_gpu_freq = rps->min_freq / GEN9_FREQ_SCALER; + max_gpu_freq = rps->max_freq / GEN9_FREQ_SCALER; } else { - min_gpu_freq = dev_priv->rps.min_freq; - max_gpu_freq = dev_priv->rps.max_freq; + min_gpu_freq = rps->min_freq; + max_gpu_freq = rps->max_freq; } /* @@ -7066,17 +7131,18 @@ static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv) static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv) { - dev_priv->rps.gpll_ref_freq = + dev_priv->gt_pm.rps.gpll_ref_freq = vlv_get_cck_clock(dev_priv, "GPLL ref", CCK_GPLL_CLOCK_CONTROL, dev_priv->czclk_freq); DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", - dev_priv->rps.gpll_ref_freq); + dev_priv->gt_pm.rps.gpll_ref_freq); } static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; valleyview_setup_pctx(dev_priv); @@ -7098,30 +7164,31 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) } DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); - dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv); - dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; + rps->max_freq = valleyview_rps_max_freq(dev_priv); + rps->rp0_freq = rps->max_freq; DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq), - dev_priv->rps.max_freq); + intel_gpu_freq(dev_priv, rps->max_freq), + rps->max_freq); - dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv); + rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv); DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), - dev_priv->rps.efficient_freq); + intel_gpu_freq(dev_priv, rps->efficient_freq), + rps->efficient_freq); - dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv); + rps->rp1_freq = valleyview_rps_guar_freq(dev_priv); DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), - dev_priv->rps.rp1_freq); + intel_gpu_freq(dev_priv, rps->rp1_freq), + rps->rp1_freq); - dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv); + rps->min_freq = valleyview_rps_min_freq(dev_priv); DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), - dev_priv->rps.min_freq); + intel_gpu_freq(dev_priv, rps->min_freq), + rps->min_freq); } static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; cherryview_setup_pctx(dev_priv); @@ -7142,31 +7209,29 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) } DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); - dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv); - dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; + rps->max_freq = cherryview_rps_max_freq(dev_priv); + rps->rp0_freq = rps->max_freq; DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq), - dev_priv->rps.max_freq); + intel_gpu_freq(dev_priv, rps->max_freq), + rps->max_freq); - dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv); + rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv); DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), - dev_priv->rps.efficient_freq); + intel_gpu_freq(dev_priv, rps->efficient_freq), + rps->efficient_freq); - dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv); + rps->rp1_freq = cherryview_rps_guar_freq(dev_priv); DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), - dev_priv->rps.rp1_freq); + intel_gpu_freq(dev_priv, rps->rp1_freq), + rps->rp1_freq); - dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv); + rps->min_freq = cherryview_rps_min_freq(dev_priv); DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), - dev_priv->rps.min_freq); + intel_gpu_freq(dev_priv, rps->min_freq), + rps->min_freq); - WARN_ONCE((dev_priv->rps.max_freq | - dev_priv->rps.efficient_freq | - dev_priv->rps.rp1_freq | - dev_priv->rps.min_freq) & 1, + WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq | + rps->min_freq) & 1, "Odd GPU freq values\n"); } @@ -7175,13 +7240,11 @@ static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv) valleyview_cleanup_pctx(dev_priv); } -static void cherryview_enable_rps(struct drm_i915_private *dev_priv) +static void cherryview_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; - u32 gtfifodbg, val, rc6_mode = 0, pcbr; - - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + u32 gtfifodbg, rc6_mode = 0, pcbr; gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV | GT_FIFO_FREE_ENTRIES_CHV); @@ -7212,7 +7275,7 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) /* TO threshold set to 500 us ( 0x186 * 1.28 us) */ I915_WRITE(GEN6_RC6_THRESHOLD, 0x186); - /* allows RC6 residency counter to work */ + /* Allows RC6 residency counter to work */ I915_WRITE(VLV_COUNTER_CONTROL, _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | VLV_MEDIA_RC6_COUNT_EN | @@ -7222,13 +7285,22 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) pcbr = I915_READ(VLV_PCBR); /* 3: Enable RC6 */ - if ((intel_enable_rc6() & INTEL_RC6_ENABLE) && + if ((intel_rc6_enabled() & INTEL_RC6_ENABLE) && (pcbr >> VLV_PCBR_ADDR_SHIFT)) rc6_mode = GEN7_RC_CTL_TO_MODE; I915_WRITE(GEN6_RC_CONTROL, rc6_mode); - /* 4 Program defaults and thresholds for RPS*/ + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); +} + +static void cherryview_enable_rps(struct drm_i915_private *dev_priv) +{ + u32 val; + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + /* 1: Program defaults and thresholds for RPS*/ I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); @@ -7237,7 +7309,7 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - /* 5: Enable RPS */ + /* 2: Enable RPS */ I915_WRITE(GEN6_RP_CONTROL, GEN6_RP_MEDIA_HW_NORMAL_MODE | GEN6_RP_MEDIA_IS_GFX | @@ -7264,13 +7336,11 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void valleyview_enable_rps(struct drm_i915_private *dev_priv) +static void valleyview_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; - u32 gtfifodbg, val, rc6_mode = 0; - - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + u32 gtfifodbg, rc6_mode = 0; valleyview_check_pctx(dev_priv); @@ -7281,28 +7351,11 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GTFIFODBG, gtfifodbg); } - /* If VLV, Forcewake all wells, else re-direct to regular path */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); /* Disable RC states. */ I915_WRITE(GEN6_RC_CONTROL, 0); - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); - I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); - I915_WRITE(GEN6_RP_UP_EI, 66000); - I915_WRITE(GEN6_RP_DOWN_EI, 350000); - - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_CONT); - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); @@ -7312,7 +7365,7 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); - /* allows RC6 residency counter to work */ + /* Allows RC6 residency counter to work */ I915_WRITE(VLV_COUNTER_CONTROL, _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | VLV_MEDIA_RC0_COUNT_EN | @@ -7320,13 +7373,38 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv) VLV_MEDIA_RC6_COUNT_EN | VLV_RENDER_RC6_COUNT_EN)); - if (intel_enable_rc6() & INTEL_RC6_ENABLE) + if (intel_rc6_enabled() & INTEL_RC6_ENABLE) rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; intel_print_rc6_info(dev_priv, rc6_mode); I915_WRITE(GEN6_RC_CONTROL, rc6_mode); + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); +} + +static void valleyview_enable_rps(struct drm_i915_private *dev_priv) +{ + u32 val; + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); + I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); + I915_WRITE(GEN6_RP_UP_EI, 66000); + I915_WRITE(GEN6_RP_DOWN_EI, 350000); + + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + + I915_WRITE(GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_CONT); + /* Setting Fixed Bias */ val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | @@ -7534,7 +7612,7 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) lockdep_assert_held(&mchdev_lock); - pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq)); + pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq)); pxvid = (pxvid >> 24) & 0x7f; ext_v = pvid_to_extvid(dev_priv, pxvid); @@ -7821,6 +7899,8 @@ static void intel_init_emon(struct drm_i915_private *dev_priv) void intel_init_gt_powersave(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* * RPM depends on RC6 to save restore the GT HW context, so make RC6 a * requirement. @@ -7831,7 +7911,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) } mutex_lock(&dev_priv->drm.struct_mutex); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); /* Initialize RPS limits (for userspace) */ if (IS_CHERRYVIEW(dev_priv)) @@ -7842,16 +7922,16 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) gen6_init_rps_frequencies(dev_priv); /* Derive initial user preferences/limits from the hardware limits */ - dev_priv->rps.idle_freq = dev_priv->rps.min_freq; - dev_priv->rps.cur_freq = dev_priv->rps.idle_freq; + rps->idle_freq = rps->min_freq; + rps->cur_freq = rps->idle_freq; - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; + rps->max_freq_softlimit = rps->max_freq; + rps->min_freq_softlimit = rps->min_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - dev_priv->rps.min_freq_softlimit = + rps->min_freq_softlimit = max_t(int, - dev_priv->rps.efficient_freq, + rps->efficient_freq, intel_freq_opcode(dev_priv, 450)); /* After setting max-softlimit, find the overclock max freq */ @@ -7862,16 +7942,16 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, ¶ms); if (params & BIT(31)) { /* OC supported */ DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", - (dev_priv->rps.max_freq & 0xff) * 50, + (rps->max_freq & 0xff) * 50, (params & 0xff) * 50); - dev_priv->rps.max_freq = params & 0xff; + rps->max_freq = params & 0xff; } } /* Finally allow us to boost to max by default */ - dev_priv->rps.boost_freq = dev_priv->rps.max_freq; + rps->boost_freq = rps->max_freq; - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); mutex_unlock(&dev_priv->drm.struct_mutex); intel_autoenable_gt_powersave(dev_priv); @@ -7899,7 +7979,7 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) < 6) return; - if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work)) + if (cancel_delayed_work_sync(&dev_priv->gt_pm.autoenable_work)) intel_runtime_pm_put(dev_priv); /* gen6_rps_idle() will be called later to disable interrupts */ @@ -7907,90 +7987,168 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv) void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) { - dev_priv->rps.enabled = true; /* force disabling */ + dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */ + dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */ intel_disable_gt_powersave(dev_priv); gen6_reset_rps_interrupts(dev_priv); } -void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) +static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) { - if (!READ_ONCE(dev_priv->rps.enabled)) + lockdep_assert_held(&i915->pcu_lock); + + if (!i915->gt_pm.llc_pstate.enabled) return; - mutex_lock(&dev_priv->rps.hw_lock); + /* Currently there is no HW configuration to be done to disable. */ - if (INTEL_GEN(dev_priv) >= 9) { + i915->gt_pm.llc_pstate.enabled = false; +} + +static void intel_disable_rc6(struct drm_i915_private *dev_priv) +{ + lockdep_assert_held(&dev_priv->pcu_lock); + + if (!dev_priv->gt_pm.rc6.enabled) + return; + + if (INTEL_GEN(dev_priv) >= 9) gen9_disable_rc6(dev_priv); + else if (IS_CHERRYVIEW(dev_priv)) + cherryview_disable_rc6(dev_priv); + else if (IS_VALLEYVIEW(dev_priv)) + valleyview_disable_rc6(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_disable_rc6(dev_priv); + + dev_priv->gt_pm.rc6.enabled = false; +} + +static void intel_disable_rps(struct drm_i915_private *dev_priv) +{ + lockdep_assert_held(&dev_priv->pcu_lock); + + if (!dev_priv->gt_pm.rps.enabled) + return; + + if (INTEL_GEN(dev_priv) >= 9) gen9_disable_rps(dev_priv); - } else if (IS_CHERRYVIEW(dev_priv)) { + else if (IS_CHERRYVIEW(dev_priv)) cherryview_disable_rps(dev_priv); - } else if (IS_VALLEYVIEW(dev_priv)) { + else if (IS_VALLEYVIEW(dev_priv)) valleyview_disable_rps(dev_priv); - } else if (INTEL_GEN(dev_priv) >= 6) { + else if (INTEL_GEN(dev_priv) >= 6) gen6_disable_rps(dev_priv); - } else if (IS_IRONLAKE_M(dev_priv)) { + else if (IS_IRONLAKE_M(dev_priv)) ironlake_disable_drps(dev_priv); - } - dev_priv->rps.enabled = false; - mutex_unlock(&dev_priv->rps.hw_lock); + dev_priv->gt_pm.rps.enabled = false; } -void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) +void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) { - /* We shouldn't be disabling as we submit, so this should be less - * racy than it appears! - */ - if (READ_ONCE(dev_priv->rps.enabled)) + mutex_lock(&dev_priv->pcu_lock); + + intel_disable_rc6(dev_priv); + intel_disable_rps(dev_priv); + if (HAS_LLC(dev_priv)) + intel_disable_llc_pstate(dev_priv); + + mutex_unlock(&dev_priv->pcu_lock); +} + +static inline void intel_enable_llc_pstate(struct drm_i915_private *i915) +{ + lockdep_assert_held(&i915->pcu_lock); + + if (i915->gt_pm.llc_pstate.enabled) return; - /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(dev_priv)) + gen6_update_ring_freq(i915); + + i915->gt_pm.llc_pstate.enabled = true; +} + +static void intel_enable_rc6(struct drm_i915_private *dev_priv) +{ + lockdep_assert_held(&dev_priv->pcu_lock); + + if (dev_priv->gt_pm.rc6.enabled) return; - mutex_lock(&dev_priv->rps.hw_lock); + if (IS_CHERRYVIEW(dev_priv)) + cherryview_enable_rc6(dev_priv); + else if (IS_VALLEYVIEW(dev_priv)) + valleyview_enable_rc6(dev_priv); + else if (INTEL_GEN(dev_priv) >= 9) + gen9_enable_rc6(dev_priv); + else if (IS_BROADWELL(dev_priv)) + gen8_enable_rc6(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_enable_rc6(dev_priv); + + dev_priv->gt_pm.rc6.enabled = true; +} + +static void intel_enable_rps(struct drm_i915_private *dev_priv) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + lockdep_assert_held(&dev_priv->pcu_lock); + + if (rps->enabled) + return; if (IS_CHERRYVIEW(dev_priv)) { cherryview_enable_rps(dev_priv); } else if (IS_VALLEYVIEW(dev_priv)) { valleyview_enable_rps(dev_priv); } else if (INTEL_GEN(dev_priv) >= 9) { - gen9_enable_rc6(dev_priv); gen9_enable_rps(dev_priv); - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) - gen6_update_ring_freq(dev_priv); } else if (IS_BROADWELL(dev_priv)) { gen8_enable_rps(dev_priv); - gen6_update_ring_freq(dev_priv); } else if (INTEL_GEN(dev_priv) >= 6) { gen6_enable_rps(dev_priv); - gen6_update_ring_freq(dev_priv); } else if (IS_IRONLAKE_M(dev_priv)) { ironlake_enable_drps(dev_priv); intel_init_emon(dev_priv); } - WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq); - WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq); + WARN_ON(rps->max_freq < rps->min_freq); + WARN_ON(rps->idle_freq > rps->max_freq); - WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq); - WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq); + WARN_ON(rps->efficient_freq < rps->min_freq); + WARN_ON(rps->efficient_freq > rps->max_freq); - dev_priv->rps.enabled = true; - mutex_unlock(&dev_priv->rps.hw_lock); + rps->enabled = true; +} + +void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) +{ + /* Powersaving is controlled by the host when inside a VM */ + if (intel_vgpu_active(dev_priv)) + return; + + mutex_lock(&dev_priv->pcu_lock); + + intel_enable_rc6(dev_priv); + intel_enable_rps(dev_priv); + if (HAS_LLC(dev_priv)) + intel_enable_llc_pstate(dev_priv); + + mutex_unlock(&dev_priv->pcu_lock); } static void __intel_autoenable_gt_powersave(struct work_struct *work) { struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), rps.autoenable_work.work); + container_of(work, + typeof(*dev_priv), + gt_pm.autoenable_work.work); struct intel_engine_cs *rcs; struct drm_i915_gem_request *req; - if (READ_ONCE(dev_priv->rps.enabled)) - goto out; - rcs = dev_priv->engine[RCS]; if (rcs->last_retired_context) goto out; @@ -8018,9 +8176,6 @@ out: void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) { - if (READ_ONCE(dev_priv->rps.enabled)) - return; - if (IS_IRONLAKE_M(dev_priv)) { ironlake_enable_drps(dev_priv); intel_init_emon(dev_priv); @@ -8038,7 +8193,7 @@ void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) * runtime resume it's necessary). */ if (queue_delayed_work(dev_priv->wq, - &dev_priv->rps.autoenable_work, + &dev_priv->gt_pm.autoenable_work, round_jiffies_up_relative(HZ))) intel_runtime_pm_get_noresume(dev_priv); } @@ -8068,18 +8223,6 @@ static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv) } } -static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv) -{ - I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); - I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); - I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN); - - /* - * Don't touch WM1S_LP_EN here. - * Doing so could cause underruns. - */ -} - static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) { uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; @@ -8113,8 +8256,6 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) (I915_READ(DISP_ARB_CTL) | DISP_FBC_WM_DIS)); - ilk_init_lp_watermarks(dev_priv); - /* * Based on the document from hardware guys the following bits * should be set unconditionally in order to enable FBC. @@ -8227,8 +8368,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_GT_MODE, _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - ilk_init_lp_watermarks(dev_priv); - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); @@ -8345,14 +8484,17 @@ static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv, int high_prio_credits) { u32 misccpctl; + u32 val; /* WaTempDisableDOPClkGating:bdw */ misccpctl = I915_READ(GEN7_MISCCPCTL); I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); - I915_WRITE(GEN8_L3SQCREG1, - L3_GENERAL_PRIO_CREDITS(general_prio_credits) | - L3_HIGH_PRIO_CREDITS(high_prio_credits)); + val = I915_READ(GEN8_L3SQCREG1); + val &= ~L3_PRIO_CREDITS_MASK; + val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits); + val |= L3_HIGH_PRIO_CREDITS(high_prio_credits); + I915_WRITE(GEN8_L3SQCREG1, val); /* * Wait at least 100 clocks before re-enabling clock gating. @@ -8447,10 +8589,11 @@ static void skl_init_clock_gating(struct drm_i915_private *dev_priv) static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) { + /* The GTT cache must be disabled if the system is using 2M pages. */ + bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv, + I915_GTT_PAGE_SIZE_2M); enum pipe pipe; - ilk_init_lp_watermarks(dev_priv); - /* WaSwitchSolVfFArbitrationPriority:bdw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); @@ -8481,12 +8624,8 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) /* WaProgramL3SqcReg1Default:bdw */ gen8_set_l3sqc_credits(dev_priv, 30, 2); - /* - * WaGttCachingOffByDefault:bdw - * GTT cache may not work with big pages, so if those - * are ever enabled GTT cache may need to be disabled. - */ - I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); + /* WaGttCachingOffByDefault:bdw */ + I915_WRITE(HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); /* WaKVMNotificationOnConfigChange:bdw */ I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1) @@ -8505,8 +8644,6 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) static void hsw_init_clock_gating(struct drm_i915_private *dev_priv) { - ilk_init_lp_watermarks(dev_priv); - /* L3 caching of data atomics doesn't work -- disable it. */ I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); I915_WRITE(HSW_ROW_CHICKEN3, @@ -8550,10 +8687,6 @@ static void hsw_init_clock_gating(struct drm_i915_private *dev_priv) /* WaSwitchSolVfFArbitrationPriority:hsw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); - /* WaRsPkgCStateDisplayPMReq:hsw */ - I915_WRITE(CHICKEN_PAR1_1, - I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); - lpt_init_clock_gating(dev_priv); } @@ -8561,8 +8694,6 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) { uint32_t snpcr; - ilk_init_lp_watermarks(dev_priv); - I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); /* WaDisableEarlyCull:ivb */ @@ -9067,7 +9198,7 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val { int status; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); /* GEN6_PCODE_* are outside of the forcewake domain, we can * use te fw I915_READ variants to reduce the amount of work @@ -9114,7 +9245,7 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, { int status; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); /* GEN6_PCODE_* are outside of the forcewake domain, we can * use te fw I915_READ variants to reduce the amount of work @@ -9191,7 +9322,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, u32 status; int ret; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \ &status) @@ -9233,31 +9364,39 @@ out: static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* * N = val - 0xb7 * Slow = Fast = GPLL ref * N */ - return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * (val - 0xb7), 1000); + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); } static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val) { - return DIV_ROUND_CLOSEST(1000 * val, dev_priv->rps.gpll_ref_freq) + 0xb7; + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; } static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* * N = val / 2 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 */ - return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * val, 2 * 2 * 1000); + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); } static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* CHV needs even values */ - return DIV_ROUND_CLOSEST(2 * 1000 * val, dev_priv->rps.gpll_ref_freq) * 2; + return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; } int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) @@ -9288,14 +9427,14 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) void intel_pm_setup(struct drm_i915_private *dev_priv) { - mutex_init(&dev_priv->rps.hw_lock); + mutex_init(&dev_priv->pcu_lock); - INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work, + INIT_DELAYED_WORK(&dev_priv->gt_pm.autoenable_work, __intel_autoenable_gt_powersave); - atomic_set(&dev_priv->rps.num_waiters, 0); + atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0); - dev_priv->pm.suspended = false; - atomic_set(&dev_priv->pm.wakeref_count, 0); + dev_priv->runtime_pm.suspended = false; + atomic_set(&dev_priv->runtime_pm.wakeref_count, 0); } static u64 vlv_residency_raw(struct drm_i915_private *dev_priv, @@ -9348,7 +9487,7 @@ u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, { u64 time_hw, units, div; - if (!intel_enable_rc6()) + if (!intel_rc6_enabled()) return 0; intel_runtime_pm_get(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 5419cda83ba8..6e3b430fccdc 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -58,6 +58,9 @@ static bool is_edp_psr(struct intel_dp *intel_dp) { + if (!intel_dp_is_edp(intel_dp)) + return false; + return intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED; } @@ -72,37 +75,6 @@ static bool vlv_is_psr_active_on_pipe(struct drm_device *dev, int pipe) (val == VLV_EDP_PSR_ACTIVE_SF_UPDATE); } -static void intel_psr_write_vsc(struct intel_dp *intel_dp, - const struct edp_vsc_psr *vsc_psr) -{ - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc = to_intel_crtc(dig_port->base.base.crtc); - enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; - i915_reg_t ctl_reg = HSW_TVIDEO_DIP_CTL(cpu_transcoder); - uint32_t *data = (uint32_t *) vsc_psr; - unsigned int i; - - /* As per BSPec (Pipe Video Data Island Packet), we need to disable - the video DIP being updated before program video DIP data buffer - registers for DIP being updated. */ - I915_WRITE(ctl_reg, 0); - POSTING_READ(ctl_reg); - - for (i = 0; i < sizeof(*vsc_psr); i += 4) { - I915_WRITE(HSW_TVIDEO_DIP_VSC_DATA(cpu_transcoder, - i >> 2), *data); - data++; - } - for (; i < VIDEO_DIP_VSC_DATA_SIZE; i += 4) - I915_WRITE(HSW_TVIDEO_DIP_VSC_DATA(cpu_transcoder, - i >> 2), 0); - - I915_WRITE(ctl_reg, VIDEO_DIP_ENABLE_VSC_HSW); - POSTING_READ(ctl_reg); -} - static void vlv_psr_setup_vsc(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { @@ -149,7 +121,8 @@ static void hsw_psr_setup_vsc(struct intel_dp *intel_dp, psr_vsc.sdp_header.HB3 = 0x8; } - intel_psr_write_vsc(intel_dp, &psr_vsc); + intel_dig_port->write_infoframe(&intel_dig_port->base.base, crtc_state, + DP_SDP_VSC, &psr_vsc, sizeof(psr_vsc)); } static void vlv_psr_enable_sink(struct intel_dp *intel_dp) @@ -376,22 +349,25 @@ static void hsw_psr_activate(struct intel_dp *intel_dp) hsw_activate_psr1(intel_dp); } -static bool intel_psr_match_conditions(struct intel_dp *intel_dp) +void intel_psr_compute_config(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc *crtc = dig_port->base.base.crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); const struct drm_display_mode *adjusted_mode = - &intel_crtc->config->base.adjusted_mode; + &crtc_state->base.adjusted_mode; int psr_setup_time; - lockdep_assert_held(&dev_priv->psr.lock); - WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)); - WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); + if (!HAS_PSR(dev_priv)) + return; + + if (!is_edp_psr(intel_dp)) + return; - dev_priv->psr.source_ok = false; + if (!i915_modparams.enable_psr) { + DRM_DEBUG_KMS("PSR disable by flag\n"); + return; + } /* * HSW spec explicitly says PSR is tied to port A. @@ -402,66 +378,70 @@ static bool intel_psr_match_conditions(struct intel_dp *intel_dp) */ if (HAS_DDI(dev_priv) && dig_port->port != PORT_A) { DRM_DEBUG_KMS("PSR condition failed: Port not supported\n"); - return false; - } - - if (!i915_modparams.enable_psr) { - DRM_DEBUG_KMS("PSR disable by flag\n"); - return false; + return; } if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && !dev_priv->psr.link_standby) { DRM_ERROR("PSR condition failed: Link off requested but not supported on this platform\n"); - return false; + return; } if (IS_HASWELL(dev_priv) && - I915_READ(HSW_STEREO_3D_CTL(intel_crtc->config->cpu_transcoder)) & + I915_READ(HSW_STEREO_3D_CTL(crtc_state->cpu_transcoder)) & S3D_ENABLE) { DRM_DEBUG_KMS("PSR condition failed: Stereo 3D is Enabled\n"); - return false; + return; } if (IS_HASWELL(dev_priv) && adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) { DRM_DEBUG_KMS("PSR condition failed: Interlaced is Enabled\n"); - return false; + return; } psr_setup_time = drm_dp_psr_setup_time(intel_dp->psr_dpcd); if (psr_setup_time < 0) { DRM_DEBUG_KMS("PSR condition failed: Invalid PSR setup time (0x%02x)\n", intel_dp->psr_dpcd[1]); - return false; + return; } if (intel_usecs_to_scanlines(adjusted_mode, psr_setup_time) > adjusted_mode->crtc_vtotal - adjusted_mode->crtc_vdisplay - 1) { DRM_DEBUG_KMS("PSR condition failed: PSR setup time (%d us) too long\n", psr_setup_time); - return false; + return; + } + + /* + * FIXME psr2_support is messed up. It's both computed + * dynamically during PSR enable, and extracted from sink + * caps during eDP detection. + */ + if (!dev_priv->psr.psr2_support) { + crtc_state->has_psr = true; + return; } /* PSR2 is restricted to work with panel resolutions upto 3200x2000 */ - if (dev_priv->psr.psr2_support && - (intel_crtc->config->pipe_src_w > 3200 || - intel_crtc->config->pipe_src_h > 2000)) { - dev_priv->psr.psr2_support = false; - return false; + if (adjusted_mode->crtc_hdisplay > 3200 || + adjusted_mode->crtc_vdisplay > 2000) { + DRM_DEBUG_KMS("PSR2 disabled, panel resolution too big\n"); + return; } /* * FIXME:enable psr2 only for y-cordinate psr2 panels * After gtc implementation , remove this restriction. */ - if (!dev_priv->psr.y_cord_support && dev_priv->psr.psr2_support) { + if (!dev_priv->psr.y_cord_support) { DRM_DEBUG_KMS("PSR2 disabled, panel does not support Y coordinate\n"); - return false; + return; } - dev_priv->psr.source_ok = true; - return true; + crtc_state->has_psr = true; + crtc_state->has_psr2 = true; } static void intel_psr_activate(struct intel_dp *intel_dp) @@ -531,13 +511,8 @@ void intel_psr_enable(struct intel_dp *intel_dp, struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - if (!HAS_PSR(dev_priv)) - return; - - if (!is_edp_psr(intel_dp)) { - DRM_DEBUG_KMS("PSR not supported by this panel\n"); + if (!crtc_state->has_psr) return; - } WARN_ON(dev_priv->drrs.dp); mutex_lock(&dev_priv->psr.lock); @@ -546,8 +521,8 @@ void intel_psr_enable(struct intel_dp *intel_dp, goto unlock; } - if (!intel_psr_match_conditions(intel_dp)) - goto unlock; + dev_priv->psr.psr2_support = crtc_state->has_psr2; + dev_priv->psr.source_ok = true; dev_priv->psr.busy_frontbuffer_bits = 0; @@ -668,7 +643,7 @@ void intel_psr_disable(struct intel_dp *intel_dp, struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - if (!HAS_PSR(dev_priv)) + if (!old_crtc_state->has_psr) return; mutex_lock(&dev_priv->psr.lock); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 05c08b0bc172..8da1bde442dd 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -484,11 +484,6 @@ static bool stop_ring(struct intel_engine_cs *engine) I915_WRITE_HEAD(engine, 0); I915_WRITE_TAIL(engine, 0); - if (INTEL_GEN(dev_priv) > 2) { - (void)I915_READ_CTL(engine); - I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING)); - } - return (I915_READ_HEAD(engine) & HEAD_ADDR) == 0; } @@ -570,6 +565,9 @@ static int init_ring_common(struct intel_engine_cs *engine) intel_engine_init_hangcheck(engine); + if (INTEL_GEN(dev_priv) > 2) + I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING)); + out: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); @@ -579,7 +577,16 @@ out: static void reset_ring_common(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { - /* Try to restore the logical GPU state to match the continuation + /* + * RC6 must be prevented until the reset is complete and the engine + * reinitialised. If it occurs in the middle of this sequence, the + * state written to/loaded from the power context is ill-defined (e.g. + * the PP_BASE_DIR may be lost). + */ + assert_forcewakes_active(engine->i915, FORCEWAKE_ALL); + + /* + * Try to restore the logical GPU state to match the continuation * of the request queue. If we skip the context/PD restore, then * the next request may try to execute assuming that its context * is valid and loaded on the GPU and so may try to access invalid @@ -1237,6 +1244,8 @@ int intel_ring_pin(struct intel_ring *ring, if (IS_ERR(addr)) goto err; + vma->obj->pin_global++; + ring->vaddr = addr; return 0; @@ -1268,6 +1277,7 @@ void intel_ring_unpin(struct intel_ring *ring) i915_gem_object_unpin_map(ring->vma->obj); ring->vaddr = NULL; + ring->vma->obj->pin_global--; i915_vma_unpin(ring->vma); } @@ -1432,6 +1442,7 @@ intel_ring_context_pin(struct intel_engine_cs *engine, goto err; ce->state->obj->mm.dirty = true; + ce->state->obj->pin_global++; } /* The kernel context is only used as a placeholder for flushing the @@ -1466,8 +1477,10 @@ static void intel_ring_context_unpin(struct intel_engine_cs *engine, if (--ce->pin_count) return; - if (ce->state) + if (ce->state) { + ce->state->obj->pin_global--; i915_vma_unpin(ce->state); + } i915_gem_context_put(ctx); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 56d7ae9f298b..2863d5a65187 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _INTEL_RINGBUFFER_H_ #define _INTEL_RINGBUFFER_H_ @@ -7,6 +8,8 @@ #include "i915_gem_timeline.h" #include "i915_selftest.h" +struct drm_printer; + #define I915_CMD_HASH_ORDER 9 /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, @@ -239,6 +242,19 @@ struct intel_engine_execlists { } port[EXECLIST_MAX_PORTS]; /** + * @active: is the HW active? We consider the HW as active after + * submitting any context for execution and until we have seen the + * last context completion event. After that, we do not expect any + * more events until we submit, and so can park the HW. + * + * As we have a small number of different sources from which we feed + * the HW, we track the state of each inside a single bitfield. + */ + unsigned int active; +#define EXECLISTS_ACTIVE_USER 0 +#define EXECLISTS_ACTIVE_PREEMPT 1 + + /** * @port_mask: number of execlist ports - 1 */ unsigned int port_mask; @@ -518,6 +534,27 @@ struct intel_engine_cs { u32 (*get_cmd_length_mask)(u32 cmd_header); }; +static inline void +execlists_set_active(struct intel_engine_execlists *execlists, + unsigned int bit) +{ + __set_bit(bit, (unsigned long *)&execlists->active); +} + +static inline void +execlists_clear_active(struct intel_engine_execlists *execlists, + unsigned int bit) +{ + __clear_bit(bit, (unsigned long *)&execlists->active); +} + +static inline bool +execlists_is_active(const struct intel_engine_execlists *execlists, + unsigned int bit) +{ + return test_bit(bit, (unsigned long *)&execlists->active); +} + static inline unsigned int execlists_num_ports(const struct intel_engine_execlists * const execlists) { @@ -531,6 +568,7 @@ execlists_port_complete(struct intel_engine_execlists * const execlists, const unsigned int m = execlists->port_mask; GEM_BUG_ON(port_index(port, execlists) != 0); + GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); memmove(port, port + 1, m * sizeof(struct execlist_port)); memset(port + m, 0, sizeof(struct execlist_port)); @@ -834,4 +872,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915); bool intel_engine_can_store_dword(struct intel_engine_cs *engine); +void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *p); + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 7933d1bc6a1c..8af286c63d3b 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -187,7 +187,7 @@ bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, struct i915_power_well *power_well; bool is_enabled; - if (dev_priv->pm.suspended) + if (dev_priv->runtime_pm.suspended) return false; is_enabled = true; @@ -368,7 +368,7 @@ static void hsw_power_well_enable(struct drm_i915_private *dev_priv, { enum i915_power_well_id id = power_well->id; bool wait_fuses = power_well->hsw.has_fuses; - enum skl_power_gate pg; + enum skl_power_gate uninitialized_var(pg); u32 val; if (wait_fuses) { @@ -785,7 +785,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv, state = enable ? PUNIT_PWRGT_PWR_ON(power_well_id) : PUNIT_PWRGT_PWR_GATE(power_well_id); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); #define COND \ ((vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask) == state) @@ -806,7 +806,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv, #undef COND out: - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } static void vlv_power_well_enable(struct drm_i915_private *dev_priv, @@ -833,7 +833,7 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, mask = PUNIT_PWRGT_MASK(power_well_id); ctrl = PUNIT_PWRGT_PWR_ON(power_well_id); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask; /* @@ -852,7 +852,7 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL) & mask; WARN_ON(ctrl != state); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); return enabled; } @@ -1364,7 +1364,7 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, bool enabled; u32 state, ctrl; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); state = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe); /* @@ -1381,7 +1381,7 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSC_MASK(pipe); WARN_ON(ctrl << 16 != state); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); return enabled; } @@ -1396,7 +1396,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); #define COND \ ((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe)) == state) @@ -1417,7 +1417,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, #undef COND out: - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv, @@ -2809,6 +2809,9 @@ static void cnl_display_core_init(struct drm_i915_private *dev_priv, bool resume /* 6. Enable DBUF */ gen9_dbuf_enable(dev_priv); + + if (resume && dev_priv->csr.dmc_payload) + intel_csr_load_program(dev_priv); } static void cnl_display_core_uninit(struct drm_i915_private *dev_priv) @@ -3125,7 +3128,7 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv) ret = pm_runtime_get_sync(kdev); WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret); - atomic_inc(&dev_priv->pm.wakeref_count); + atomic_inc(&dev_priv->runtime_pm.wakeref_count); assert_rpm_wakelock_held(dev_priv); } @@ -3159,7 +3162,7 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv) return false; } - atomic_inc(&dev_priv->pm.wakeref_count); + atomic_inc(&dev_priv->runtime_pm.wakeref_count); assert_rpm_wakelock_held(dev_priv); return true; @@ -3190,7 +3193,7 @@ void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv) assert_rpm_wakelock_held(dev_priv); pm_runtime_get_noresume(kdev); - atomic_inc(&dev_priv->pm.wakeref_count); + atomic_inc(&dev_priv->runtime_pm.wakeref_count); } /** @@ -3207,7 +3210,7 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv) struct device *kdev = &pdev->dev; assert_rpm_wakelock_held(dev_priv); - atomic_dec(&dev_priv->pm.wakeref_count); + atomic_dec(&dev_priv->runtime_pm.wakeref_count); pm_runtime_mark_last_busy(kdev); pm_runtime_put_autosuspend(kdev); diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 7d971cb56116..75c872bb8cc9 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -81,7 +81,7 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr) { u32 val = 0; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); mutex_lock(&dev_priv->sb_lock); vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, @@ -95,7 +95,7 @@ int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val) { int err; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); mutex_lock(&dev_priv->sb_lock); err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, @@ -125,7 +125,7 @@ u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr) { u32 val = 0; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); mutex_lock(&dev_priv->sb_lock); vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_NC, diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 28a1209d87e2..4fcf80ca91dd 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -66,7 +66,13 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, 1000 * adjusted_mode->crtc_htotal); } +/* FIXME: We should instead only take spinlocks once for the entire update + * instead of once per mmio. */ +#if IS_ENABLED(CONFIG_PROVE_LOCKING) +#define VBLANK_EVASION_TIME_US 250 +#else #define VBLANK_EVASION_TIME_US 100 +#endif /** * intel_pipe_update_start() - start update of a set of display registers @@ -224,7 +230,7 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state) #endif } -static void +void skl_update_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) @@ -305,7 +311,7 @@ skl_update_plane(struct intel_plane *plane, spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } -static void +void skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 277477890240..25bd162f38d2 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -22,22 +22,9 @@ * */ -#include "i915_drv.h" #include "intel_uc.h" -#include <linux/firmware.h> - -/* Cleans up uC firmware by releasing the firmware GEM obj. - */ -static void __intel_uc_fw_fini(struct intel_uc_fw *uc_fw) -{ - struct drm_i915_gem_object *obj; - - obj = fetch_and_zero(&uc_fw->obj); - if (obj) - i915_gem_object_put(obj); - - uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; -} +#include "i915_drv.h" +#include "i915_guc_submission.h" /* Reset GuC providing us with fresh state for both GuC and HuC. */ @@ -81,7 +68,7 @@ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv) if (HAS_HUC_UCODE(dev_priv)) intel_huc_select_fw(&dev_priv->huc); - if (intel_guc_select_fw(&dev_priv->guc)) + if (intel_guc_fw_select(&dev_priv->guc)) i915_modparams.enable_guc_loading = 0; } @@ -94,198 +81,34 @@ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv) i915_modparams.enable_guc_submission = HAS_GUC_SCHED(dev_priv); } -static void gen8_guc_raise_irq(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - - I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); -} - void intel_uc_init_early(struct drm_i915_private *dev_priv) { - struct intel_guc *guc = &dev_priv->guc; - - intel_guc_ct_init_early(&guc->ct); - - mutex_init(&guc->send_mutex); - guc->send = intel_guc_send_nop; - guc->notify = gen8_guc_raise_irq; -} - -static void fetch_uc_fw(struct drm_i915_private *dev_priv, - struct intel_uc_fw *uc_fw) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - struct drm_i915_gem_object *obj; - const struct firmware *fw = NULL; - struct uc_css_header *css; - size_t size; - int err; - - if (!uc_fw->path) - return; - - uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; - - DRM_DEBUG_DRIVER("before requesting firmware: uC fw fetch status %s\n", - intel_uc_fw_status_repr(uc_fw->fetch_status)); - - err = request_firmware(&fw, uc_fw->path, &pdev->dev); - if (err) - goto fail; - if (!fw) - goto fail; - - DRM_DEBUG_DRIVER("fetch uC fw from %s succeeded, fw %p\n", - uc_fw->path, fw); - - /* Check the size of the blob before examining buffer contents */ - if (fw->size < sizeof(struct uc_css_header)) { - DRM_NOTE("Firmware header is missing\n"); - goto fail; - } - - css = (struct uc_css_header *)fw->data; - - /* Firmware bits always start from header */ - uc_fw->header_offset = 0; - uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw - - css->key_size_dw - css->exponent_size_dw) * sizeof(u32); - - if (uc_fw->header_size != sizeof(struct uc_css_header)) { - DRM_NOTE("CSS header definition mismatch\n"); - goto fail; - } - - /* then, uCode */ - uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size; - uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); - - /* now RSA */ - if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { - DRM_NOTE("RSA key size is bad\n"); - goto fail; - } - uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size; - uc_fw->rsa_size = css->key_size_dw * sizeof(u32); - - /* At least, it should have header, uCode and RSA. Size of all three. */ - size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size; - if (fw->size < size) { - DRM_NOTE("Missing firmware components\n"); - goto fail; - } - - /* - * The GuC firmware image has the version number embedded at a - * well-known offset within the firmware blob; note that major / minor - * version are TWO bytes each (i.e. u16), although all pointers and - * offsets are defined in terms of bytes (u8). - */ - switch (uc_fw->type) { - case INTEL_UC_FW_TYPE_GUC: - /* Header and uCode will be loaded to WOPCM. Size of the two. */ - size = uc_fw->header_size + uc_fw->ucode_size; - - /* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */ - if (size > intel_guc_wopcm_size(dev_priv)) { - DRM_ERROR("Firmware is too large to fit in WOPCM\n"); - goto fail; - } - uc_fw->major_ver_found = css->guc.sw_version >> 16; - uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF; - break; - - case INTEL_UC_FW_TYPE_HUC: - uc_fw->major_ver_found = css->huc.sw_version >> 16; - uc_fw->minor_ver_found = css->huc.sw_version & 0xFFFF; - break; - - default: - DRM_ERROR("Unknown firmware type %d\n", uc_fw->type); - err = -ENOEXEC; - goto fail; - } - - if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) { - DRM_NOTE("Skipping %s firmware version check\n", - intel_uc_fw_type_repr(uc_fw->type)); - } else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || - uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { - DRM_NOTE("%s firmware version %d.%d, required %d.%d\n", - intel_uc_fw_type_repr(uc_fw->type), - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); - err = -ENOEXEC; - goto fail; - } - - DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n", - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); - - obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto fail; - } - - uc_fw->obj = obj; - uc_fw->size = fw->size; - - DRM_DEBUG_DRIVER("uC fw fetch status SUCCESS, obj %p\n", - uc_fw->obj); - - release_firmware(fw); - uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS; - return; - -fail: - DRM_WARN("Failed to fetch valid uC firmware from %s (error %d)\n", - uc_fw->path, err); - DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n", - err, fw, uc_fw->obj); - - release_firmware(fw); /* OK even if fw is NULL */ - uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; + intel_guc_init_early(&dev_priv->guc); } void intel_uc_init_fw(struct drm_i915_private *dev_priv) { - fetch_uc_fw(dev_priv, &dev_priv->huc.fw); - fetch_uc_fw(dev_priv, &dev_priv->guc.fw); + intel_uc_fw_fetch(dev_priv, &dev_priv->huc.fw); + intel_uc_fw_fetch(dev_priv, &dev_priv->guc.fw); } void intel_uc_fini_fw(struct drm_i915_private *dev_priv) { - __intel_uc_fw_fini(&dev_priv->guc.fw); - __intel_uc_fw_fini(&dev_priv->huc.fw); -} - -static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i) -{ - GEM_BUG_ON(!guc->send_regs.base); - GEM_BUG_ON(!guc->send_regs.count); - GEM_BUG_ON(i >= guc->send_regs.count); - - return _MMIO(guc->send_regs.base + 4 * i); + intel_uc_fw_fini(&dev_priv->guc.fw); + intel_uc_fw_fini(&dev_priv->huc.fw); } -static void guc_init_send_regs(struct intel_guc *guc) +/** + * intel_uc_init_mmio - setup uC MMIO access + * + * @dev_priv: device private + * + * Setup minimal state necessary for MMIO accesses later in the + * initialization sequence. + */ +void intel_uc_init_mmio(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); - enum forcewake_domains fw_domains = 0; - unsigned int i; - - guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0)); - guc->send_regs.count = SOFT_SCRATCH_COUNT - 1; - - for (i = 0; i < guc->send_regs.count; i++) { - fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, - guc_send_reg(guc, i), - FW_REG_READ | FW_REG_WRITE); - } - guc->send_regs.fw_domains = fw_domains; + intel_guc_init_send_regs(&dev_priv->guc); } static void guc_capture_load_err_log(struct intel_guc *guc) @@ -309,8 +132,6 @@ static int guc_enable_communication(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); - guc_init_send_regs(guc); - if (HAS_GUC_CT(dev_priv)) return intel_guc_enable_ct(guc); @@ -328,27 +149,6 @@ static void guc_disable_communication(struct intel_guc *guc) guc->send = intel_guc_send_nop; } -/** - * intel_guc_auth_huc() - Send action to GuC to authenticate HuC ucode - * @guc: intel_guc structure - * @rsa_offset: rsa offset w.r.t ggtt base of huc vma - * - * Triggers a HuC firmware authentication request to the GuC via intel_guc_send - * INTEL_GUC_ACTION_AUTHENTICATE_HUC interface. This function is invoked by - * intel_huc_auth(). - * - * Return: non-zero code on error - */ -int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset) -{ - u32 action[] = { - INTEL_GUC_ACTION_AUTHENTICATE_HUC, - rsa_offset - }; - - return intel_guc_send(guc, action, ARRAY_SIZE(action)); -} - int intel_uc_init_hw(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; @@ -395,7 +195,8 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) goto err_submission; intel_huc_init_hw(&dev_priv->huc); - ret = intel_guc_init_hw(&dev_priv->guc); + intel_guc_init_params(guc); + ret = intel_guc_fw_upload(guc); if (ret == 0 || ret != -EAGAIN) break; @@ -421,6 +222,12 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) goto err_interrupts; } + dev_info(dev_priv->drm.dev, "GuC %s (firmware %s [version %u.%u])\n", + i915_modparams.enable_guc_submission ? "submission enabled" : + "loaded", + guc->fw.path, + guc->fw.major_ver_found, guc->fw.minor_ver_found); + return 0; /* @@ -443,12 +250,14 @@ err_submission: err_guc: i915_ggtt_disable_guc(dev_priv); - DRM_ERROR("GuC init failed\n"); if (i915_modparams.enable_guc_loading > 1 || - i915_modparams.enable_guc_submission > 1) + i915_modparams.enable_guc_submission > 1) { + DRM_ERROR("GuC init failed. Firmware loading disabled.\n"); ret = -EIO; - else + } else { + DRM_NOTE("GuC init failed. Firmware loading disabled.\n"); ret = 0; + } if (i915_modparams.enable_guc_submission) { i915_modparams.enable_guc_submission = 0; @@ -456,7 +265,6 @@ err_guc: } i915_modparams.enable_guc_loading = 0; - DRM_NOTE("GuC firmware loading disabled\n"); return ret; } @@ -480,82 +288,3 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv) i915_ggtt_disable_guc(dev_priv); } - -int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len) -{ - WARN(1, "Unexpected send: action=%#x\n", *action); - return -ENODEV; -} - -/* - * This function implements the MMIO based host to GuC interface. - */ -int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - u32 status; - int i; - int ret; - - GEM_BUG_ON(!len); - GEM_BUG_ON(len > guc->send_regs.count); - - /* If CT is available, we expect to use MMIO only during init/fini */ - GEM_BUG_ON(HAS_GUC_CT(dev_priv) && - *action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER && - *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER); - - mutex_lock(&guc->send_mutex); - intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains); - - for (i = 0; i < len; i++) - I915_WRITE(guc_send_reg(guc, i), action[i]); - - POSTING_READ(guc_send_reg(guc, i - 1)); - - intel_guc_notify(guc); - - /* - * No GuC command should ever take longer than 10ms. - * Fast commands should still complete in 10us. - */ - ret = __intel_wait_for_register_fw(dev_priv, - guc_send_reg(guc, 0), - INTEL_GUC_RECV_MASK, - INTEL_GUC_RECV_MASK, - 10, 10, &status); - if (status != INTEL_GUC_STATUS_SUCCESS) { - /* - * Either the GuC explicitly returned an error (which - * we convert to -EIO here) or no response at all was - * received within the timeout limit (-ETIMEDOUT) - */ - if (ret != -ETIMEDOUT) - ret = -EIO; - - DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;" - " ret=%d status=0x%08X response=0x%08X\n", - action[0], ret, status, I915_READ(SOFT_SCRATCH(15))); - } - - intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains); - mutex_unlock(&guc->send_mutex); - - return ret; -} - -int intel_guc_sample_forcewake(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - u32 action[2]; - - action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE; - /* WaRsDisableCoarsePowerGating:skl,bxt */ - if (!intel_enable_rc6() || NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) - action[1] = 0; - else - /* bit 0 and 1 are for Render and Media domain separately */ - action[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA; - - return intel_guc_send(guc, action, ARRAY_SIZE(action)); -} diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 6966349ed737..e18d3bb02088 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -24,237 +24,15 @@ #ifndef _INTEL_UC_H_ #define _INTEL_UC_H_ -#include "intel_guc_fwif.h" -#include "i915_guc_reg.h" -#include "intel_ringbuffer.h" -#include "intel_guc_ct.h" -#include "i915_vma.h" +#include "intel_guc.h" +#include "intel_huc.h" -struct drm_i915_gem_request; - -/* - * This structure primarily describes the GEM object shared with the GuC. - * The specs sometimes refer to this object as a "GuC context", but we use - * the term "client" to avoid confusion with hardware contexts. This - * GEM object is held for the entire lifetime of our interaction with - * the GuC, being allocated before the GuC is loaded with its firmware. - * Because there's no way to update the address used by the GuC after - * initialisation, the shared object must stay pinned into the GGTT as - * long as the GuC is in use. We also keep the first page (only) mapped - * into kernel address space, as it includes shared data that must be - * updated on every request submission. - * - * The single GEM object described here is actually made up of several - * separate areas, as far as the GuC is concerned. The first page (kept - * kmap'd) includes the "process descriptor" which holds sequence data for - * the doorbell, and one cacheline which actually *is* the doorbell; a - * write to this will "ring the doorbell" (i.e. send an interrupt to the - * GuC). The subsequent pages of the client object constitute the work - * queue (a circular array of work items), again described in the process - * descriptor. Work queue pages are mapped momentarily as required. - */ -struct i915_guc_client { - struct i915_vma *vma; - void *vaddr; - struct i915_gem_context *owner; - struct intel_guc *guc; - - uint32_t engines; /* bitmap of (host) engine ids */ - uint32_t priority; - u32 stage_id; - uint32_t proc_desc_offset; - - u16 doorbell_id; - unsigned long doorbell_offset; - - spinlock_t wq_lock; - /* Per-engine counts of GuC submissions */ - uint64_t submissions[I915_NUM_ENGINES]; -}; - -enum intel_uc_fw_status { - INTEL_UC_FIRMWARE_FAIL = -1, - INTEL_UC_FIRMWARE_NONE = 0, - INTEL_UC_FIRMWARE_PENDING, - INTEL_UC_FIRMWARE_SUCCESS -}; - -/* User-friendly representation of an enum */ -static inline -const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) -{ - switch (status) { - case INTEL_UC_FIRMWARE_FAIL: - return "FAIL"; - case INTEL_UC_FIRMWARE_NONE: - return "NONE"; - case INTEL_UC_FIRMWARE_PENDING: - return "PENDING"; - case INTEL_UC_FIRMWARE_SUCCESS: - return "SUCCESS"; - } - return "<invalid>"; -} - -enum intel_uc_fw_type { - INTEL_UC_FW_TYPE_GUC, - INTEL_UC_FW_TYPE_HUC -}; - -/* User-friendly representation of an enum */ -static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) -{ - switch (type) { - case INTEL_UC_FW_TYPE_GUC: - return "GuC"; - case INTEL_UC_FW_TYPE_HUC: - return "HuC"; - } - return "uC"; -} - -/* - * This structure encapsulates all the data needed during the process - * of fetching, caching, and loading the firmware image into the GuC. - */ -struct intel_uc_fw { - const char *path; - size_t size; - struct drm_i915_gem_object *obj; - enum intel_uc_fw_status fetch_status; - enum intel_uc_fw_status load_status; - - uint16_t major_ver_wanted; - uint16_t minor_ver_wanted; - uint16_t major_ver_found; - uint16_t minor_ver_found; - - enum intel_uc_fw_type type; - uint32_t header_size; - uint32_t header_offset; - uint32_t rsa_size; - uint32_t rsa_offset; - uint32_t ucode_size; - uint32_t ucode_offset; -}; - -struct intel_guc_log { - uint32_t flags; - struct i915_vma *vma; - /* The runtime stuff gets created only when GuC logging gets enabled */ - struct { - void *buf_addr; - struct workqueue_struct *flush_wq; - struct work_struct flush_work; - struct rchan *relay_chan; - } runtime; - /* logging related stats */ - u32 capture_miss_count; - u32 flush_interrupt_count; - u32 prev_overflow_count[GUC_MAX_LOG_BUFFER]; - u32 total_overflow_count[GUC_MAX_LOG_BUFFER]; - u32 flush_count[GUC_MAX_LOG_BUFFER]; -}; - -struct intel_guc { - struct intel_uc_fw fw; - struct intel_guc_log log; - struct intel_guc_ct ct; - - /* Log snapshot if GuC errors during load */ - struct drm_i915_gem_object *load_err_log; - - /* intel_guc_recv interrupt related state */ - bool interrupts_enabled; - - struct i915_vma *ads_vma; - struct i915_vma *stage_desc_pool; - void *stage_desc_pool_vaddr; - struct ida stage_ids; - - struct i915_guc_client *execbuf_client; - - DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); - uint32_t db_cacheline; /* Cyclic counter mod pagesize */ - - /* GuC's FW specific registers used in MMIO send */ - struct { - u32 base; - unsigned int count; - enum forcewake_domains fw_domains; - } send_regs; - - /* To serialize the intel_guc_send actions */ - struct mutex send_mutex; - - /* GuC's FW specific send function */ - int (*send)(struct intel_guc *guc, const u32 *data, u32 len); - - /* GuC's FW specific notify function */ - void (*notify)(struct intel_guc *guc); -}; - -struct intel_huc { - /* Generic uC firmware management */ - struct intel_uc_fw fw; - - /* HuC-specific additions */ -}; - -/* intel_uc.c */ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv); void intel_uc_init_early(struct drm_i915_private *dev_priv); +void intel_uc_init_mmio(struct drm_i915_private *dev_priv); void intel_uc_init_fw(struct drm_i915_private *dev_priv); void intel_uc_fini_fw(struct drm_i915_private *dev_priv); int intel_uc_init_hw(struct drm_i915_private *dev_priv); void intel_uc_fini_hw(struct drm_i915_private *dev_priv); -int intel_guc_sample_forcewake(struct intel_guc *guc); -int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len); -int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); -int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset); - -static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) -{ - return guc->send(guc, action, len); -} - -static inline void intel_guc_notify(struct intel_guc *guc) -{ - guc->notify(guc); -} - -/* intel_guc_loader.c */ -int intel_guc_select_fw(struct intel_guc *guc); -int intel_guc_init_hw(struct intel_guc *guc); -int intel_guc_suspend(struct drm_i915_private *dev_priv); -int intel_guc_resume(struct drm_i915_private *dev_priv); -u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); - -/* i915_guc_submission.c */ -int i915_guc_submission_init(struct drm_i915_private *dev_priv); -int i915_guc_submission_enable(struct drm_i915_private *dev_priv); -void i915_guc_submission_disable(struct drm_i915_private *dev_priv); -void i915_guc_submission_fini(struct drm_i915_private *dev_priv); -struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); - -/* intel_guc_log.c */ -int intel_guc_log_create(struct intel_guc *guc); -void intel_guc_log_destroy(struct intel_guc *guc); -int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); -void i915_guc_log_register(struct drm_i915_private *dev_priv); -void i915_guc_log_unregister(struct drm_i915_private *dev_priv); - -static inline u32 guc_ggtt_offset(struct i915_vma *vma) -{ - u32 offset = i915_ggtt_offset(vma); - GEM_BUG_ON(offset < GUC_WOPCM_TOP); - GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP)); - return offset; -} - -/* intel_huc.c */ -void intel_huc_select_fw(struct intel_huc *huc); -void intel_huc_init_hw(struct intel_huc *huc); -void intel_huc_auth(struct intel_huc *huc); #endif diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c new file mode 100644 index 000000000000..973888e94cba --- /dev/null +++ b/drivers/gpu/drm/i915/intel_uc_fw.c @@ -0,0 +1,318 @@ +/* + * Copyright © 2016-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include <drm/drm_print.h> + +#include "intel_uc_fw.h" +#include "i915_drv.h" + +/** + * intel_uc_fw_fetch - fetch uC firmware + * + * @dev_priv: device private + * @uc_fw: uC firmware + * + * Fetch uC firmware into GEM obj. + */ +void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, + struct intel_uc_fw *uc_fw) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + struct drm_i915_gem_object *obj; + const struct firmware *fw = NULL; + struct uc_css_header *css; + size_t size; + int err; + + DRM_DEBUG_DRIVER("%s fw fetch %s\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path); + + if (!uc_fw->path) + return; + + uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; + DRM_DEBUG_DRIVER("%s fw fetch %s\n", + intel_uc_fw_type_repr(uc_fw->type), + intel_uc_fw_status_repr(uc_fw->fetch_status)); + + err = request_firmware(&fw, uc_fw->path, &pdev->dev); + if (err) { + DRM_DEBUG_DRIVER("%s fw request_firmware err=%d\n", + intel_uc_fw_type_repr(uc_fw->type), err); + goto fail; + } + + DRM_DEBUG_DRIVER("%s fw size %zu ptr %p\n", + intel_uc_fw_type_repr(uc_fw->type), fw->size, fw); + + /* Check the size of the blob before examining buffer contents */ + if (fw->size < sizeof(struct uc_css_header)) { + DRM_WARN("%s: Unexpected firmware size (%zu, min %zu)\n", + intel_uc_fw_type_repr(uc_fw->type), + fw->size, sizeof(struct uc_css_header)); + err = -ENODATA; + goto fail; + } + + css = (struct uc_css_header *)fw->data; + + /* Firmware bits always start from header */ + uc_fw->header_offset = 0; + uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw - + css->key_size_dw - css->exponent_size_dw) * + sizeof(u32); + + if (uc_fw->header_size != sizeof(struct uc_css_header)) { + DRM_WARN("%s: Mismatched firmware header definition\n", + intel_uc_fw_type_repr(uc_fw->type)); + err = -ENOEXEC; + goto fail; + } + + /* then, uCode */ + uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size; + uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); + + /* Header and uCode will be loaded to WOPCM */ + size = uc_fw->header_size + uc_fw->ucode_size; + if (size > intel_guc_wopcm_size(dev_priv)) { + DRM_WARN("%s: Firmware is too large to fit in WOPCM\n", + intel_uc_fw_type_repr(uc_fw->type)); + err = -E2BIG; + goto fail; + } + + /* now RSA */ + if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { + DRM_WARN("%s: Mismatched firmware RSA key size (%u)\n", + intel_uc_fw_type_repr(uc_fw->type), css->key_size_dw); + err = -ENOEXEC; + goto fail; + } + uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size; + uc_fw->rsa_size = css->key_size_dw * sizeof(u32); + + /* At least, it should have header, uCode and RSA. Size of all three. */ + size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size; + if (fw->size < size) { + DRM_WARN("%s: Truncated firmware (%zu, expected %zu)\n", + intel_uc_fw_type_repr(uc_fw->type), fw->size, size); + err = -ENOEXEC; + goto fail; + } + + /* + * The GuC firmware image has the version number embedded at a + * well-known offset within the firmware blob; note that major / minor + * version are TWO bytes each (i.e. u16), although all pointers and + * offsets are defined in terms of bytes (u8). + */ + switch (uc_fw->type) { + case INTEL_UC_FW_TYPE_GUC: + uc_fw->major_ver_found = css->guc.sw_version >> 16; + uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF; + break; + + case INTEL_UC_FW_TYPE_HUC: + uc_fw->major_ver_found = css->huc.sw_version >> 16; + uc_fw->minor_ver_found = css->huc.sw_version & 0xFFFF; + break; + + default: + MISSING_CASE(uc_fw->type); + break; + } + + DRM_DEBUG_DRIVER("%s fw version %u.%u (wanted %u.%u)\n", + intel_uc_fw_type_repr(uc_fw->type), + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); + + if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) { + DRM_NOTE("%s: Skipping firmware version check\n", + intel_uc_fw_type_repr(uc_fw->type)); + } else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || + uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { + DRM_NOTE("%s: Wrong firmware version (%u.%u, required %u.%u)\n", + intel_uc_fw_type_repr(uc_fw->type), + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); + err = -ENOEXEC; + goto fail; + } + + obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + DRM_DEBUG_DRIVER("%s fw object_create err=%d\n", + intel_uc_fw_type_repr(uc_fw->type), err); + goto fail; + } + + uc_fw->obj = obj; + uc_fw->size = fw->size; + uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS; + DRM_DEBUG_DRIVER("%s fw fetch %s\n", + intel_uc_fw_type_repr(uc_fw->type), + intel_uc_fw_status_repr(uc_fw->fetch_status)); + + release_firmware(fw); + return; + +fail: + uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; + DRM_DEBUG_DRIVER("%s fw fetch %s\n", + intel_uc_fw_type_repr(uc_fw->type), + intel_uc_fw_status_repr(uc_fw->fetch_status)); + + DRM_WARN("%s: Failed to fetch firmware %s (error %d)\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); + DRM_INFO("%s: Firmware can be downloaded from %s\n", + intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL); + + release_firmware(fw); /* OK even if fw is NULL */ +} + +/** + * intel_uc_fw_upload - load uC firmware using custom loader + * + * @uc_fw: uC firmware + * @loader: custom uC firmware loader function + * + * Loads uC firmware using custom loader and updates internal flags. + */ +int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, + int (*xfer)(struct intel_uc_fw *uc_fw, + struct i915_vma *vma)) +{ + struct i915_vma *vma; + int err; + + DRM_DEBUG_DRIVER("%s fw load %s\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path); + + if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS) + return -EIO; + + uc_fw->load_status = INTEL_UC_FIRMWARE_PENDING; + DRM_DEBUG_DRIVER("%s fw load %s\n", + intel_uc_fw_type_repr(uc_fw->type), + intel_uc_fw_status_repr(uc_fw->load_status)); + + /* Pin object with firmware */ + err = i915_gem_object_set_to_gtt_domain(uc_fw->obj, false); + if (err) { + DRM_DEBUG_DRIVER("%s fw set-domain err=%d\n", + intel_uc_fw_type_repr(uc_fw->type), err); + goto fail; + } + + vma = i915_gem_object_ggtt_pin(uc_fw->obj, NULL, 0, 0, + PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + DRM_DEBUG_DRIVER("%s fw ggtt-pin err=%d\n", + intel_uc_fw_type_repr(uc_fw->type), err); + goto fail; + } + + /* Call custom loader */ + err = xfer(uc_fw, vma); + + /* + * We keep the object pages for reuse during resume. But we can unpin it + * now that DMA has completed, so it doesn't continue to take up space. + */ + i915_vma_unpin(vma); + + if (err) + goto fail; + + uc_fw->load_status = INTEL_UC_FIRMWARE_SUCCESS; + DRM_DEBUG_DRIVER("%s fw load %s\n", + intel_uc_fw_type_repr(uc_fw->type), + intel_uc_fw_status_repr(uc_fw->load_status)); + + DRM_INFO("%s: Loaded firmware %s (version %u.%u)\n", + intel_uc_fw_type_repr(uc_fw->type), + uc_fw->path, + uc_fw->major_ver_found, uc_fw->minor_ver_found); + + return 0; + +fail: + uc_fw->load_status = INTEL_UC_FIRMWARE_FAIL; + DRM_DEBUG_DRIVER("%s fw load %s\n", + intel_uc_fw_type_repr(uc_fw->type), + intel_uc_fw_status_repr(uc_fw->load_status)); + + DRM_WARN("%s: Failed to load firmware %s (error %d)\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); + + return err; +} + +/** + * intel_uc_fw_fini - cleanup uC firmware + * + * @uc_fw: uC firmware + * + * Cleans up uC firmware by releasing the firmware GEM obj. + */ +void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) +{ + struct drm_i915_gem_object *obj; + + obj = fetch_and_zero(&uc_fw->obj); + if (obj) + i915_gem_object_put(obj); + + uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; +} + +/** + * intel_uc_fw_dump - dump information about uC firmware + * @uc_fw: uC firmware + * @p: the &drm_printer + * + * Pretty printer for uC firmware. + */ +void intel_uc_fw_dump(struct intel_uc_fw *uc_fw, struct drm_printer *p) +{ + drm_printf(p, "%s firmware: %s\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path); + drm_printf(p, "\tstatus: fetch %s, load %s\n", + intel_uc_fw_status_repr(uc_fw->fetch_status), + intel_uc_fw_status_repr(uc_fw->load_status)); + drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n", + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted, + uc_fw->major_ver_found, uc_fw->minor_ver_found); + drm_printf(p, "\theader: offset %u, size %u\n", + uc_fw->header_offset, uc_fw->header_size); + drm_printf(p, "\tuCode: offset %u, size %u\n", + uc_fw->ucode_offset, uc_fw->ucode_size); + drm_printf(p, "\tRSA: offset %u, size %u\n", + uc_fw->rsa_offset, uc_fw->rsa_size); +} diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h new file mode 100644 index 000000000000..132903669391 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_uc_fw.h @@ -0,0 +1,121 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_UC_FW_H_ +#define _INTEL_UC_FW_H_ + +struct drm_printer; +struct drm_i915_private; +struct i915_vma; + +/* Home of GuC, HuC and DMC firmwares */ +#define INTEL_UC_FIRMWARE_URL "https://01.org/linuxgraphics/downloads/firmware" + +enum intel_uc_fw_status { + INTEL_UC_FIRMWARE_FAIL = -1, + INTEL_UC_FIRMWARE_NONE = 0, + INTEL_UC_FIRMWARE_PENDING, + INTEL_UC_FIRMWARE_SUCCESS +}; + +enum intel_uc_fw_type { + INTEL_UC_FW_TYPE_GUC, + INTEL_UC_FW_TYPE_HUC +}; + +/* + * This structure encapsulates all the data needed during the process + * of fetching, caching, and loading the firmware image into the uC. + */ +struct intel_uc_fw { + const char *path; + size_t size; + struct drm_i915_gem_object *obj; + enum intel_uc_fw_status fetch_status; + enum intel_uc_fw_status load_status; + + /* + * The firmware build process will generate a version header file with major and + * minor version defined. The versions are built into CSS header of firmware. + * i915 kernel driver set the minimal firmware version required per platform. + */ + u16 major_ver_wanted; + u16 minor_ver_wanted; + u16 major_ver_found; + u16 minor_ver_found; + + enum intel_uc_fw_type type; + u32 header_size; + u32 header_offset; + u32 rsa_size; + u32 rsa_offset; + u32 ucode_size; + u32 ucode_offset; +}; + +static inline +const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) +{ + switch (status) { + case INTEL_UC_FIRMWARE_FAIL: + return "FAIL"; + case INTEL_UC_FIRMWARE_NONE: + return "NONE"; + case INTEL_UC_FIRMWARE_PENDING: + return "PENDING"; + case INTEL_UC_FIRMWARE_SUCCESS: + return "SUCCESS"; + } + return "<invalid>"; +} + +static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) +{ + switch (type) { + case INTEL_UC_FW_TYPE_GUC: + return "GuC"; + case INTEL_UC_FW_TYPE_HUC: + return "HuC"; + } + return "uC"; +} + +static inline +void intel_uc_fw_init(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type) +{ + uc_fw->path = NULL; + uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; + uc_fw->load_status = INTEL_UC_FIRMWARE_NONE; + uc_fw->type = type; +} + +void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, + struct intel_uc_fw *uc_fw); +int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, + int (*xfer)(struct intel_uc_fw *uc_fw, + struct i915_vma *vma)); +void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); +void intel_uc_fw_dump(struct intel_uc_fw *uc_fw, struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index b3c3f94fc7e4..20e3c65c0999 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -626,7 +626,23 @@ void assert_forcewakes_inactive(struct drm_i915_private *dev_priv) if (!dev_priv->uncore.funcs.force_wake_get) return; - WARN_ON(dev_priv->uncore.fw_domains_active); + WARN(dev_priv->uncore.fw_domains_active, + "Expected all fw_domains to be inactive, but %08x are still on\n", + dev_priv->uncore.fw_domains_active); +} + +void assert_forcewakes_active(struct drm_i915_private *dev_priv, + enum forcewake_domains fw_domains) +{ + if (!dev_priv->uncore.funcs.force_wake_get) + return; + + assert_rpm_wakelock_held(dev_priv); + + fw_domains &= dev_priv->uncore.fw_domains; + WARN(fw_domains & ~dev_priv->uncore.fw_domains_active, + "Expected %08x fw_domains to be active, but %08x are off\n", + fw_domains, fw_domains & ~dev_priv->uncore.fw_domains_active); } /* We give fast paths for the really cool registers */ @@ -1387,6 +1403,9 @@ static void i915_stop_engines(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine; enum intel_engine_id id; + if (INTEL_GEN(dev_priv) < 3) + return; + for_each_engine_masked(engine, dev_priv, engine_mask, id) gen3_stop_engine(engine); } @@ -1726,16 +1745,12 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *dev_priv) int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) { - reset_func reset; + reset_func reset = intel_get_gpu_reset(dev_priv); int retry; int ret; might_sleep(); - reset = intel_get_gpu_reset(dev_priv); - if (reset == NULL) - return -ENODEV; - /* If the power well sleeps during the reset, the reset * request may be dropped and never completes (causing -EIO). */ @@ -1755,7 +1770,9 @@ int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) */ i915_stop_engines(dev_priv, engine_mask); - ret = reset(dev_priv, engine_mask); + ret = -ENODEV; + if (reset) + ret = reset(dev_priv, engine_mask); if (ret != -ETIMEDOUT) break; diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 03786f931905..582771251b57 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -25,6 +25,12 @@ #ifndef __INTEL_UNCORE_H__ #define __INTEL_UNCORE_H__ +#include <linux/spinlock.h> +#include <linux/notifier.h> +#include <linux/hrtimer.h> + +#include "i915_reg.h" + struct drm_i915_private; enum forcewake_domain_id { @@ -131,6 +137,8 @@ void intel_uncore_resume_early(struct drm_i915_private *dev_priv); u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv); void assert_forcewakes_inactive(struct drm_i915_private *dev_priv); +void assert_forcewakes_active(struct drm_i915_private *dev_priv, + enum forcewake_domains fw_domains); const char *intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id); enum forcewake_domains diff --git a/drivers/gpu/drm/i915/intel_vbt_defs.h b/drivers/gpu/drm/i915/intel_vbt_defs.h index 404569c9fdfc..f225c288a121 100644 --- a/drivers/gpu/drm/i915/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/intel_vbt_defs.h @@ -306,6 +306,14 @@ struct bdb_general_features { #define LEGACY_CHILD_DEVICE_CONFIG_SIZE 33 +/* DDC Bus DDI Type 155+ */ +enum vbt_gmbus_ddi { + DDC_BUS_DDI_B = 0x1, + DDC_BUS_DDI_C, + DDC_BUS_DDI_D, + DDC_BUS_DDI_F, +}; + /* * The child device config, aka the display device data structure, provides a * description of a port and its configuration on the platform. diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c index c5c7e8efbdd3..a2632df39173 100644 --- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c @@ -37,8 +37,7 @@ static void huge_free_pages(struct drm_i915_gem_object *obj, kfree(pages); } -static struct sg_table * -huge_get_pages(struct drm_i915_gem_object *obj) +static int huge_get_pages(struct drm_i915_gem_object *obj) { #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) const unsigned long nreal = obj->scratch / PAGE_SIZE; @@ -49,11 +48,11 @@ huge_get_pages(struct drm_i915_gem_object *obj) pages = kmalloc(sizeof(*pages), GFP); if (!pages) - return ERR_PTR(-ENOMEM); + return -ENOMEM; if (sg_alloc_table(pages, npages, GFP)) { kfree(pages); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } sg = pages->sgl; @@ -81,11 +80,14 @@ huge_get_pages(struct drm_i915_gem_object *obj) if (i915_gem_gtt_prepare_pages(obj, pages)) goto err; - return pages; + __i915_gem_object_set_pages(obj, pages, PAGE_SIZE); + + return 0; err: huge_free_pages(obj, pages); - return ERR_PTR(-ENOMEM); + + return -ENOMEM; #undef GFP } diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c new file mode 100644 index 000000000000..5cc8101bb2b1 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -0,0 +1,1734 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include <linux/prime_numbers.h> + +#include "mock_drm.h" + +static const unsigned int page_sizes[] = { + I915_GTT_PAGE_SIZE_2M, + I915_GTT_PAGE_SIZE_64K, + I915_GTT_PAGE_SIZE_4K, +}; + +static unsigned int get_largest_page_size(struct drm_i915_private *i915, + u64 rem) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { + unsigned int page_size = page_sizes[i]; + + if (HAS_PAGE_SIZES(i915, page_size) && rem >= page_size) + return page_size; + } + + return 0; +} + +static void huge_pages_free_pages(struct sg_table *st) +{ + struct scatterlist *sg; + + for (sg = st->sgl; sg; sg = __sg_next(sg)) { + if (sg_page(sg)) + __free_pages(sg_page(sg), get_order(sg->length)); + } + + sg_free_table(st); + kfree(st); +} + +static int get_huge_pages(struct drm_i915_gem_object *obj) +{ +#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) + unsigned int page_mask = obj->mm.page_mask; + struct sg_table *st; + struct scatterlist *sg; + unsigned int sg_page_sizes; + u64 rem; + + st = kmalloc(sizeof(*st), GFP); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { + kfree(st); + return -ENOMEM; + } + + rem = obj->base.size; + sg = st->sgl; + st->nents = 0; + sg_page_sizes = 0; + + /* + * Our goal here is simple, we want to greedily fill the object from + * largest to smallest page-size, while ensuring that we use *every* + * page-size as per the given page-mask. + */ + do { + unsigned int bit = ilog2(page_mask); + unsigned int page_size = BIT(bit); + int order = get_order(page_size); + + do { + struct page *page; + + GEM_BUG_ON(order >= MAX_ORDER); + page = alloc_pages(GFP | __GFP_ZERO, order); + if (!page) + goto err; + + sg_set_page(sg, page, page_size, 0); + sg_page_sizes |= page_size; + st->nents++; + + rem -= page_size; + if (!rem) { + sg_mark_end(sg); + break; + } + + sg = __sg_next(sg); + } while ((rem - ((page_size-1) & page_mask)) >= page_size); + + page_mask &= (page_size-1); + } while (page_mask); + + if (i915_gem_gtt_prepare_pages(obj, st)) + goto err; + + obj->mm.madv = I915_MADV_DONTNEED; + + GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask); + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; + +err: + sg_set_page(sg, NULL, 0, 0); + sg_mark_end(sg); + huge_pages_free_pages(st); + + return -ENOMEM; +} + +static void put_huge_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + i915_gem_gtt_finish_pages(obj, pages); + huge_pages_free_pages(pages); + + obj->mm.dirty = false; + obj->mm.madv = I915_MADV_WILLNEED; +} + +static const struct drm_i915_gem_object_ops huge_page_ops = { + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = get_huge_pages, + .put_pages = put_huge_pages, +}; + +static struct drm_i915_gem_object * +huge_pages_object(struct drm_i915_private *i915, + u64 size, + unsigned int page_mask) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask)))); + + if (size >> PAGE_SHIFT > INT_MAX) + return ERR_PTR(-E2BIG); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(i915); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &huge_page_ops); + + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = I915_CACHE_NONE; + + obj->mm.page_mask = page_mask; + + return obj; +} + +static int fake_get_huge_pages(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + const u64 max_len = rounddown_pow_of_two(UINT_MAX); + struct sg_table *st; + struct scatterlist *sg; + unsigned int sg_page_sizes; + u64 rem; + + st = kmalloc(sizeof(*st), GFP); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { + kfree(st); + return -ENOMEM; + } + + /* Use optimal page sized chunks to fill in the sg table */ + rem = obj->base.size; + sg = st->sgl; + st->nents = 0; + sg_page_sizes = 0; + do { + unsigned int page_size = get_largest_page_size(i915, rem); + unsigned int len = min(page_size * div_u64(rem, page_size), + max_len); + + GEM_BUG_ON(!page_size); + + sg->offset = 0; + sg->length = len; + sg_dma_len(sg) = len; + sg_dma_address(sg) = page_size; + + sg_page_sizes |= len; + + st->nents++; + + rem -= len; + if (!rem) { + sg_mark_end(sg); + break; + } + + sg = sg_next(sg); + } while (1); + + obj->mm.madv = I915_MADV_DONTNEED; + + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; +} + +static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct sg_table *st; + struct scatterlist *sg; + unsigned int page_size; + + st = kmalloc(sizeof(*st), GFP); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, 1, GFP)) { + kfree(st); + return -ENOMEM; + } + + sg = st->sgl; + st->nents = 1; + + page_size = get_largest_page_size(i915, obj->base.size); + GEM_BUG_ON(!page_size); + + sg->offset = 0; + sg->length = obj->base.size; + sg_dma_len(sg) = obj->base.size; + sg_dma_address(sg) = page_size; + + obj->mm.madv = I915_MADV_DONTNEED; + + __i915_gem_object_set_pages(obj, st, sg->length); + + return 0; +#undef GFP +} + +static void fake_free_huge_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + sg_free_table(pages); + kfree(pages); +} + +static void fake_put_huge_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + fake_free_huge_pages(obj, pages); + obj->mm.dirty = false; + obj->mm.madv = I915_MADV_WILLNEED; +} + +static const struct drm_i915_gem_object_ops fake_ops = { + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = fake_get_huge_pages, + .put_pages = fake_put_huge_pages, +}; + +static const struct drm_i915_gem_object_ops fake_ops_single = { + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = fake_get_huge_pages_single, + .put_pages = fake_put_huge_pages, +}; + +static struct drm_i915_gem_object * +fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); + + if (size >> PAGE_SHIFT > UINT_MAX) + return ERR_PTR(-E2BIG); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(i915); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + + if (single) + i915_gem_object_init(obj, &fake_ops_single); + else + i915_gem_object_init(obj, &fake_ops); + + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = I915_CACHE_NONE; + + return obj; +} + +static int igt_check_page_sizes(struct i915_vma *vma) +{ + struct drm_i915_private *i915 = to_i915(vma->obj->base.dev); + unsigned int supported = INTEL_INFO(i915)->page_sizes; + struct drm_i915_gem_object *obj = vma->obj; + int err = 0; + + if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) { + pr_err("unsupported page_sizes.sg=%u, supported=%u\n", + vma->page_sizes.sg & ~supported, supported); + err = -EINVAL; + } + + if (!HAS_PAGE_SIZES(i915, vma->page_sizes.gtt)) { + pr_err("unsupported page_sizes.gtt=%u, supported=%u\n", + vma->page_sizes.gtt & ~supported, supported); + err = -EINVAL; + } + + if (vma->page_sizes.phys != obj->mm.page_sizes.phys) { + pr_err("vma->page_sizes.phys(%u) != obj->mm.page_sizes.phys(%u)\n", + vma->page_sizes.phys, obj->mm.page_sizes.phys); + err = -EINVAL; + } + + if (vma->page_sizes.sg != obj->mm.page_sizes.sg) { + pr_err("vma->page_sizes.sg(%u) != obj->mm.page_sizes.sg(%u)\n", + vma->page_sizes.sg, obj->mm.page_sizes.sg); + err = -EINVAL; + } + + if (obj->mm.page_sizes.gtt) { + pr_err("obj->page_sizes.gtt(%u) should never be set\n", + obj->mm.page_sizes.gtt); + err = -EINVAL; + } + + return err; +} + +static int igt_mock_exhaust_device_supported_pages(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->base.i915; + unsigned int saved_mask = INTEL_INFO(i915)->page_sizes; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int i, j, single; + int err; + + /* + * Sanity check creating objects with every valid page support + * combination for our mock device. + */ + + for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) { + unsigned int combination = 0; + + for (j = 0; j < ARRAY_SIZE(page_sizes); j++) { + if (i & BIT(j)) + combination |= page_sizes[j]; + } + + mkwrite_device_info(i915)->page_sizes = combination; + + for (single = 0; single <= 1; ++single) { + obj = fake_huge_pages_object(i915, combination, !!single); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_device; + } + + if (obj->base.size != combination) { + pr_err("obj->base.size=%zu, expected=%u\n", + obj->base.size, combination); + err = -EINVAL; + goto out_put; + } + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_close; + + err = igt_check_page_sizes(vma); + + if (vma->page_sizes.sg != combination) { + pr_err("page_sizes.sg=%u, expected=%u\n", + vma->page_sizes.sg, combination); + err = -EINVAL; + } + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_gem_object_put(obj); + + if (err) + goto out_device; + } + } + + goto out_device; + +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); +out_device: + mkwrite_device_info(i915)->page_sizes = saved_mask; + + return err; +} + +static int igt_mock_ppgtt_misaligned_dma(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->base.i915; + unsigned long supported = INTEL_INFO(i915)->page_sizes; + struct drm_i915_gem_object *obj; + int bit; + int err; + + /* + * Sanity check dma misalignment for huge pages -- the dma addresses we + * insert into the paging structures need to always respect the page + * size alignment. + */ + + bit = ilog2(I915_GTT_PAGE_SIZE_64K); + + for_each_set_bit_from(bit, &supported, + ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { + IGT_TIMEOUT(end_time); + unsigned int page_size = BIT(bit); + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + unsigned int offset; + unsigned int size = + round_up(page_size, I915_GTT_PAGE_SIZE_2M) << 1; + struct i915_vma *vma; + + obj = fake_huge_pages_object(i915, size, true); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (obj->base.size != size) { + pr_err("obj->base.size=%zu, expected=%u\n", + obj->base.size, size); + err = -EINVAL; + goto out_put; + } + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + /* Force the page size for this object */ + obj->mm.page_sizes.sg = page_size; + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_unpin; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + + err = igt_check_page_sizes(vma); + + if (vma->page_sizes.gtt != page_size) { + pr_err("page_sizes.gtt=%u, expected %u\n", + vma->page_sizes.gtt, page_size); + err = -EINVAL; + } + + i915_vma_unpin(vma); + + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + /* + * Try all the other valid offsets until the next + * boundary -- should always fall back to using 4K + * pages. + */ + for (offset = 4096; offset < page_size; offset += 4096) { + err = i915_vma_unbind(vma); + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + err = i915_vma_pin(vma, 0, 0, flags | offset); + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + err = igt_check_page_sizes(vma); + + if (vma->page_sizes.gtt != I915_GTT_PAGE_SIZE_4K) { + pr_err("page_sizes.gtt=%u, expected %lu\n", + vma->page_sizes.gtt, I915_GTT_PAGE_SIZE_4K); + err = -EINVAL; + } + + i915_vma_unpin(vma); + + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + if (igt_timeout(end_time, + "%s timed out at offset %x with page-size %x\n", + __func__, offset, page_size)) + break; + } + + i915_vma_close(vma); + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static void close_object_list(struct list_head *objects, + struct i915_hw_ppgtt *ppgtt) +{ + struct drm_i915_gem_object *obj, *on; + + list_for_each_entry_safe(obj, on, objects, st_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (!IS_ERR(vma)) + i915_vma_close(vma); + + list_del(&obj->st_link); + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } +} + +static int igt_mock_ppgtt_huge_fill(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->base.i915; + unsigned long max_pages = ppgtt->base.total >> PAGE_SHIFT; + unsigned long page_num; + bool single = false; + LIST_HEAD(objects); + IGT_TIMEOUT(end_time); + int err = -ENODEV; + + for_each_prime_number_from(page_num, 1, max_pages) { + struct drm_i915_gem_object *obj; + u64 size = page_num << PAGE_SHIFT; + struct i915_vma *vma; + unsigned int expected_gtt = 0; + int i; + + obj = fake_huge_pages_object(i915, size, single); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + break; + } + + if (obj->base.size != size) { + pr_err("obj->base.size=%zd, expected=%llu\n", + obj->base.size, size); + i915_gem_object_put(obj); + err = -EINVAL; + break; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + break; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + break; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + break; + + err = igt_check_page_sizes(vma); + if (err) { + i915_vma_unpin(vma); + break; + } + + /* + * Figure out the expected gtt page size knowing that we go from + * largest to smallest page size sg chunks, and that we align to + * the largest page size. + */ + for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { + unsigned int page_size = page_sizes[i]; + + if (HAS_PAGE_SIZES(i915, page_size) && + size >= page_size) { + expected_gtt |= page_size; + size &= page_size-1; + } + } + + GEM_BUG_ON(!expected_gtt); + GEM_BUG_ON(size); + + if (expected_gtt & I915_GTT_PAGE_SIZE_4K) + expected_gtt &= ~I915_GTT_PAGE_SIZE_64K; + + i915_vma_unpin(vma); + + if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { + if (!IS_ALIGNED(vma->node.start, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.start(%llx) not aligned to 2M\n", + vma->node.start); + err = -EINVAL; + break; + } + + if (!IS_ALIGNED(vma->node.size, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.size(%llx) not aligned to 2M\n", + vma->node.size); + err = -EINVAL; + break; + } + } + + if (vma->page_sizes.gtt != expected_gtt) { + pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n", + vma->page_sizes.gtt, expected_gtt, + obj->base.size, yesno(!!single)); + err = -EINVAL; + break; + } + + if (igt_timeout(end_time, + "%s timed out at size %zd\n", + __func__, obj->base.size)) + break; + + single = !single; + } + + close_object_list(&objects, ppgtt); + + if (err == -ENOMEM || err == -ENOSPC) + err = 0; + + return err; +} + +static int igt_mock_ppgtt_64K(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->base.i915; + struct drm_i915_gem_object *obj; + const struct object_info { + unsigned int size; + unsigned int gtt; + unsigned int offset; + } objects[] = { + /* Cases with forced padding/alignment */ + { + .size = SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + { + .size = SZ_64K + SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_64K - SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_2M, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + { + .size = SZ_2M - SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_2M + SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_2M + SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + { + .size = SZ_2M - SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + /* Try without any forced padding/alignment */ + { + .size = SZ_64K, + .offset = SZ_2M, + .gtt = I915_GTT_PAGE_SIZE_4K, + }, + { + .size = SZ_128K, + .offset = SZ_2M - SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_4K, + }, + }; + struct i915_vma *vma; + int i, single; + int err; + + /* + * Sanity check some of the trickiness with 64K pages -- either we can + * safely mark the whole page-table(2M block) as 64K, or we have to + * always fallback to 4K. + */ + + if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K)) + return 0; + + for (i = 0; i < ARRAY_SIZE(objects); ++i) { + unsigned int size = objects[i].size; + unsigned int expected_gtt = objects[i].gtt; + unsigned int offset = objects[i].offset; + unsigned int flags = PIN_USER; + + for (single = 0; single <= 1; single++) { + obj = fake_huge_pages_object(i915, size, !!single); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_object_put; + + /* + * Disable 2M pages -- We only want to use 64K/4K pages + * for this test. + */ + obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M; + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object_unpin; + } + + if (offset) + flags |= PIN_OFFSET_FIXED | offset; + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_vma_close; + + err = igt_check_page_sizes(vma); + if (err) + goto out_vma_unpin; + + if (!offset && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { + if (!IS_ALIGNED(vma->node.start, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.start(%llx) not aligned to 2M\n", + vma->node.start); + err = -EINVAL; + goto out_vma_unpin; + } + + if (!IS_ALIGNED(vma->node.size, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.size(%llx) not aligned to 2M\n", + vma->node.size); + err = -EINVAL; + goto out_vma_unpin; + } + } + + if (vma->page_sizes.gtt != expected_gtt) { + pr_err("gtt=%u, expected=%u, i=%d, single=%s\n", + vma->page_sizes.gtt, expected_gtt, i, + yesno(!!single)); + err = -EINVAL; + goto out_vma_unpin; + } + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + } + + return 0; + +out_vma_unpin: + i915_vma_unpin(vma); +out_vma_close: + i915_vma_close(vma); +out_object_unpin: + i915_gem_object_unpin_pages(obj); +out_object_put: + i915_gem_object_put(obj); + + return err; +} + +static struct i915_vma * +gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val) +{ + struct drm_i915_private *i915 = to_i915(vma->obj->base.dev); + const int gen = INTEL_GEN(vma->vm->i915); + unsigned int count = vma->size >> PAGE_SHIFT; + struct drm_i915_gem_object *obj; + struct i915_vma *batch; + unsigned int size; + u32 *cmd; + int n; + int err; + + size = (1 + 4 * count) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + offset += vma->node.start; + + for (n = 0; n < count; n++) { + if (gen >= 8) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = val; + } else if (gen >= 4) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4 | + (gen < 6 ? 1 << 22 : 0); + *cmd++ = 0; + *cmd++ = offset; + *cmd++ = val; + } else { + *cmd++ = MI_STORE_DWORD_IMM | 1 << 22; + *cmd++ = offset; + *cmd++ = val; + } + + offset += PAGE_SIZE; + } + + *cmd = MI_BATCH_BUFFER_END; + + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + batch = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err; + } + + err = i915_vma_pin(batch, 0, 0, PIN_USER); + if (err) + goto err; + + return batch; + +err: + i915_gem_object_put(obj); + + return ERR_PTR(err); +} + +static int gpu_write(struct i915_vma *vma, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + u32 dword, + u32 value) +{ + struct drm_i915_gem_request *rq; + struct i915_vma *batch; + int flags = 0; + int err; + + GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + + err = i915_gem_object_set_to_gtt_domain(vma->obj, true); + if (err) + return err; + + rq = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + batch = gpu_write_dw(vma, dword * sizeof(u32), value); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_request; + } + + i915_vma_move_to_active(batch, rq, 0); + i915_gem_object_set_active_reference(batch->obj); + i915_vma_unpin(batch); + i915_vma_close(batch); + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto err_request; + + err = i915_switch_context(rq); + if (err) + goto err_request; + + err = rq->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); + if (err) + goto err_request; + + i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + + reservation_object_lock(vma->resv, NULL); + reservation_object_add_excl_fence(vma->resv, &rq->fence); + reservation_object_unlock(vma->resv); + +err_request: + __i915_add_request(rq, err == 0); + + return err; +} + +static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) +{ + unsigned int needs_flush; + unsigned long n; + int err; + + err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush); + if (err) + return err; + + for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { + u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n)); + + if (needs_flush & CLFLUSH_BEFORE) + drm_clflush_virt_range(ptr, PAGE_SIZE); + + if (ptr[dword] != val) { + pr_err("n=%lu ptr[%u]=%u, val=%u\n", + n, dword, ptr[dword], val); + kunmap_atomic(ptr); + err = -EINVAL; + break; + } + + kunmap_atomic(ptr); + } + + i915_gem_obj_finish_shmem_access(obj); + + return err; +} + +static int igt_write_huge(struct i915_gem_context *ctx, + struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct intel_engine_cs *engine; + struct i915_vma *vma; + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + unsigned int max_page_size; + unsigned int id; + u64 max; + u64 num; + u64 size; + int err = 0; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + + size = obj->base.size; + if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) + size = round_up(size, I915_GTT_PAGE_SIZE_2M); + + max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); + max = div_u64((vm->total - size), max_page_size); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + + if (!intel_engine_can_store_dword(engine)) { + pr_info("store-dword-imm not supported on engine=%u\n", + id); + continue; + } + + /* + * Try various offsets until we timeout -- we want to avoid + * issues hidden by effectively always using offset = 0. + */ + for_each_prime_number_from(num, 0, max) { + u64 offset = num * max_page_size; + u32 dword; + + err = i915_vma_unbind(vma); + if (err) + goto out_vma_close; + + err = i915_vma_pin(vma, size, max_page_size, flags | offset); + if (err) { + /* + * The ggtt may have some pages reserved so + * refrain from erroring out. + */ + if (err == -ENOSPC && i915_is_ggtt(vm)) { + err = 0; + continue; + } + + goto out_vma_close; + } + + err = igt_check_page_sizes(vma); + if (err) + goto out_vma_unpin; + + dword = offset_in_page(num) / 4; + + err = gpu_write(vma, ctx, engine, dword, num + 1); + if (err) { + pr_err("gpu-write failed at offset=%llx", offset); + goto out_vma_unpin; + } + + err = cpu_check(obj, dword, num + 1); + if (err) { + pr_err("cpu-check failed at offset=%llx", offset); + goto out_vma_unpin; + } + + i915_vma_unpin(vma); + + if (num > 0 && + igt_timeout(end_time, + "%s timed out on engine=%u at offset=%llx, max_page_size=%x\n", + __func__, id, offset, max_page_size)) + break; + } + } + +out_vma_unpin: + if (i915_vma_is_pinned(vma)) + i915_vma_unpin(vma); +out_vma_close: + i915_vma_close(vma); + + return err; +} + +static int igt_ppgtt_exhaust_huge(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + unsigned long supported = INTEL_INFO(i915)->page_sizes; + static unsigned int pages[ARRAY_SIZE(page_sizes)]; + struct drm_i915_gem_object *obj; + unsigned int size_mask; + unsigned int page_mask; + int n, i; + int err = -ENODEV; + + /* + * Sanity check creating objects with a varying mix of page sizes -- + * ensuring that our writes lands in the right place. + */ + + n = 0; + for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) + pages[n++] = BIT(i); + + for (size_mask = 2; size_mask < BIT(n); size_mask++) { + unsigned int size = 0; + + for (i = 0; i < n; i++) { + if (size_mask & BIT(i)) + size |= pages[i]; + } + + /* + * For our page mask we want to enumerate all the page-size + * combinations which will fit into our chosen object size. + */ + for (page_mask = 2; page_mask <= size_mask; page_mask++) { + unsigned int page_sizes = 0; + + for (i = 0; i < n; i++) { + if (page_mask & BIT(i)) + page_sizes |= pages[i]; + } + + /* + * Ensure that we can actually fill the given object + * with our chosen page mask. + */ + if (!IS_ALIGNED(size, BIT(__ffs(page_sizes)))) + continue; + + obj = huge_pages_object(i915, size, page_sizes); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_device; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + + if (err == -ENOMEM) { + pr_info("unable to get pages, size=%u, pages=%u\n", + size, page_sizes); + err = 0; + break; + } + + pr_err("pin_pages failed, size=%u, pages=%u\n", + size_mask, page_mask); + + goto out_device; + } + + /* Force the page-size for the gtt insertion */ + obj->mm.page_sizes.sg = page_sizes; + + err = igt_write_huge(ctx, obj); + if (err) { + pr_err("exhaust write-huge failed with size=%u\n", + size); + goto out_unpin; + } + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + } + + goto out_device; + +out_unpin: + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); +out_device: + mkwrite_device_info(i915)->page_sizes = supported; + + return err; +} + +static int igt_ppgtt_internal_huge(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_object *obj; + static const unsigned int sizes[] = { + SZ_64K, + SZ_128K, + SZ_256K, + SZ_512K, + SZ_1M, + SZ_2M, + }; + int i; + int err; + + /* + * Sanity check that the HW uses huge pages correctly through internal + * -- ensure that our writes land in the right place. + */ + + for (i = 0; i < ARRAY_SIZE(sizes); ++i) { + unsigned int size = sizes[i]; + + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { + pr_info("internal unable to allocate huge-page(s) with size=%u\n", + size); + goto out_unpin; + } + + err = igt_write_huge(ctx, obj); + if (err) { + pr_err("internal write-huge failed with size=%u\n", + size); + goto out_unpin; + } + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) +{ + return i915->mm.gemfs && has_transparent_hugepage(); +} + +static int igt_ppgtt_gemfs_huge(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_object *obj; + static const unsigned int sizes[] = { + SZ_2M, + SZ_4M, + SZ_8M, + SZ_16M, + SZ_32M, + }; + int i; + int err; + + /* + * Sanity check that the HW uses huge pages correctly through gemfs -- + * ensure that our writes land in the right place. + */ + + if (!igt_can_allocate_thp(i915)) { + pr_info("missing THP support, skipping\n"); + return 0; + } + + for (i = 0; i < ARRAY_SIZE(sizes); ++i) { + unsigned int size = sizes[i]; + + obj = i915_gem_object_create(i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { + pr_info("finishing test early, gemfs unable to allocate huge-page(s) with size=%u\n", + size); + goto out_unpin; + } + + err = igt_write_huge(ctx, obj); + if (err) { + pr_err("gemfs write-huge failed with size=%u\n", + size); + goto out_unpin; + } + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static int igt_ppgtt_pin_update(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *dev_priv = ctx->i915; + unsigned long supported = INTEL_INFO(dev_priv)->page_sizes; + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + int first, last; + int err; + + /* + * Make sure there's no funny business when doing a PIN_UPDATE -- in the + * past we had a subtle issue with being able to incorrectly do multiple + * alloc va ranges on the same object when doing a PIN_UPDATE, which + * resulted in some pretty nasty bugs, though only when using + * huge-gtt-pages. + */ + + if (!USES_FULL_48BIT_PPGTT(dev_priv)) { + pr_info("48b PPGTT not supported, skipping\n"); + return 0; + } + + first = ilog2(I915_GTT_PAGE_SIZE_64K); + last = ilog2(I915_GTT_PAGE_SIZE_2M); + + for_each_set_bit_from(first, &supported, last + 1) { + unsigned int page_size = BIT(first); + + obj = i915_gem_object_create_internal(dev_priv, page_size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, SZ_2M, 0, flags); + if (err) + goto out_close; + + if (vma->page_sizes.sg < page_size) { + pr_info("Unable to allocate page-size %x, finishing test early\n", + page_size); + goto out_unpin; + } + + err = igt_check_page_sizes(vma); + if (err) + goto out_unpin; + + if (vma->page_sizes.gtt != page_size) { + dma_addr_t addr = i915_gem_object_get_dma_address(obj, 0); + + /* + * The only valid reason for this to ever fail would be + * if the dma-mapper screwed us over when we did the + * dma_map_sg(), since it has the final say over the dma + * address. + */ + if (IS_ALIGNED(addr, page_size)) { + pr_err("page_sizes.gtt=%u, expected=%u\n", + vma->page_sizes.gtt, page_size); + err = -EINVAL; + } else { + pr_info("dma address misaligned, finishing test early\n"); + } + + goto out_unpin; + } + + err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE); + if (err) + goto out_unpin; + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_gem_object_put(obj); + } + + obj = i915_gem_object_create_internal(dev_priv, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_close; + + /* + * Make sure we don't end up with something like where the pde is still + * pointing to the 2M page, and the pt we just filled-in is dangling -- + * we can check this by writing to the first page where it would then + * land in the now stale 2M page. + */ + + err = gpu_write(vma, ctx, dev_priv->engine[RCS], 0, 0xdeadbeaf); + if (err) + goto out_unpin; + + err = cpu_check(obj, 0, 0xdeadbeaf); + +out_unpin: + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static int igt_tmpfs_fallback(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct vfsmount *gemfs = i915->mm.gemfs; + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *vaddr; + int err = 0; + + /* + * Make sure that we don't burst into a ball of flames upon falling back + * to tmpfs, which we rely on if on the off-chance we encouter a failure + * when setting up gemfs. + */ + + i915->mm.gemfs = NULL; + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_restore; + } + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out_put; + } + *vaddr = 0xdeadbeaf; + + i915_gem_object_unpin_map(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_close; + + err = igt_check_page_sizes(vma); + + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); +out_restore: + i915->mm.gemfs = gemfs; + + return err; +} + +static int igt_shrink_thp(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int flags = PIN_USER; + int err; + + /* + * Sanity check shrinking huge-paged object -- make sure nothing blows + * up. + */ + + if (!igt_can_allocate_thp(i915)) { + pr_info("missing THP support, skipping\n"); + return 0; + } + + obj = i915_gem_object_create(i915, SZ_2M); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_close; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { + pr_info("failed to allocate THP, finishing test early\n"); + goto out_unpin; + } + + err = igt_check_page_sizes(vma); + if (err) + goto out_unpin; + + err = gpu_write(vma, ctx, i915->engine[RCS], 0, 0xdeadbeaf); + if (err) + goto out_unpin; + + i915_vma_unpin(vma); + + /* + * Now that the pages are *unpinned* shrink-all should invoke + * shmem to truncate our pages. + */ + i915_gem_shrink_all(i915); + if (!IS_ERR_OR_NULL(obj->mm.pages)) { + pr_err("shrink-all didn't truncate the pages\n"); + err = -EINVAL; + goto out_close; + } + + if (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys) { + pr_err("residual page-size bits left\n"); + err = -EINVAL; + goto out_close; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_close; + + err = cpu_check(obj, 0, 0xdeadbeaf); + +out_unpin: + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); + + return err; +} + +int i915_gem_huge_page_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_mock_exhaust_device_supported_pages), + SUBTEST(igt_mock_ppgtt_misaligned_dma), + SUBTEST(igt_mock_ppgtt_huge_fill), + SUBTEST(igt_mock_ppgtt_64K), + }; + int saved_ppgtt = i915_modparams.enable_ppgtt; + struct drm_i915_private *dev_priv; + struct pci_dev *pdev; + struct i915_hw_ppgtt *ppgtt; + int err; + + dev_priv = mock_gem_device(); + if (!dev_priv) + return -ENOMEM; + + /* Pretend to be a device which supports the 48b PPGTT */ + i915_modparams.enable_ppgtt = 3; + + pdev = dev_priv->drm.pdev; + dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(39)); + + mutex_lock(&dev_priv->drm.struct_mutex); + ppgtt = i915_ppgtt_create(dev_priv, ERR_PTR(-ENODEV), "mock"); + if (IS_ERR(ppgtt)) { + err = PTR_ERR(ppgtt); + goto out_unlock; + } + + if (!i915_vm_is_48bit(&ppgtt->base)) { + pr_err("failed to create 48b PPGTT\n"); + err = -EINVAL; + goto out_close; + } + + /* If we were ever hit this then it's time to mock the 64K scratch */ + if (!i915_vm_has_scratch_64K(&ppgtt->base)) { + pr_err("PPGTT missing 64K scratch page\n"); + err = -EINVAL; + goto out_close; + } + + err = i915_subtests(tests, ppgtt); + +out_close: + i915_ppgtt_close(&ppgtt->base); + i915_ppgtt_put(ppgtt); + +out_unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); + + i915_modparams.enable_ppgtt = saved_ppgtt; + + drm_dev_unref(&dev_priv->drm); + + return err; +} + +int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_shrink_thp), + SUBTEST(igt_ppgtt_pin_update), + SUBTEST(igt_tmpfs_fallback), + SUBTEST(igt_ppgtt_exhaust_huge), + SUBTEST(igt_ppgtt_gemfs_huge), + SUBTEST(igt_ppgtt_internal_huge), + }; + struct drm_file *file; + struct i915_gem_context *ctx; + int err; + + if (!USES_PPGTT(dev_priv)) { + pr_info("PPGTT not supported, skipping live-selftests\n"); + return 0; + } + + file = mock_file(dev_priv); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&dev_priv->drm.struct_mutex); + + ctx = live_context(dev_priv, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + err = i915_subtests(tests, ctx); + +out_unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); + + mock_file_free(dev_priv, file); + + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index fb0a58fc8348..def5052862ae 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -417,7 +417,7 @@ static int fake_aliasing_ppgtt_enable(struct drm_i915_private *i915) if (err) return err; - list_for_each_entry(obj, &i915->mm.bound_list, global_link) { + list_for_each_entry(obj, &i915->mm.bound_list, mm.link) { struct i915_vma *vma; vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 5ea373221f49..f463105ff48d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -24,6 +24,9 @@ #include "../i915_selftest.h" +#include "lib_sw_fence.h" +#include "mock_context.h" +#include "mock_drm.h" #include "mock_gem_device.h" static int populate_ggtt(struct drm_i915_private *i915) @@ -47,7 +50,7 @@ static int populate_ggtt(struct drm_i915_private *i915) if (!list_empty(&i915->mm.unbound_list)) { size = 0; - list_for_each_entry(obj, &i915->mm.unbound_list, global_link) + list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) size++; pr_err("Found %lld objects unbound!\n", size); @@ -74,10 +77,10 @@ static void cleanup_objects(struct drm_i915_private *i915) { struct drm_i915_gem_object *obj, *on; - list_for_each_entry_safe(obj, on, &i915->mm.unbound_list, global_link) + list_for_each_entry_safe(obj, on, &i915->mm.unbound_list, mm.link) i915_gem_object_put(obj); - list_for_each_entry_safe(obj, on, &i915->mm.bound_list, global_link) + list_for_each_entry_safe(obj, on, &i915->mm.bound_list, mm.link) i915_gem_object_put(obj); mutex_unlock(&i915->drm.struct_mutex); @@ -149,8 +152,6 @@ static int igt_overcommit(void *arg) goto cleanup; } - list_move(&obj->global_link, &i915->mm.unbound_list); - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); if (!IS_ERR(vma) || PTR_ERR(vma) != -ENOSPC) { pr_err("Failed to evict+insert, i915_gem_object_ggtt_pin returned err=%d\n", (int)PTR_ERR(vma)); @@ -325,6 +326,148 @@ cleanup: return err; } +static int igt_evict_contexts(void *arg) +{ + const u64 PRETEND_GGTT_SIZE = 16ull << 20; + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct reserved { + struct drm_mm_node node; + struct reserved *next; + } *reserved = NULL; + struct drm_mm_node hole; + unsigned long count; + int err; + + /* + * The purpose of this test is to verify that we will trigger an + * eviction in the GGTT when constructing a request that requires + * additional space in the GGTT for pinning the context. This space + * is not directly tied to the request so reclaiming it requires + * extra work. + * + * As such this test is only meaningful for full-ppgtt environments + * where the GTT space of the request is separate from the GGTT + * allocation required to build the request. + */ + if (!USES_FULL_PPGTT(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + + /* Reserve a block so that we know we have enough to fit a few rq */ + memset(&hole, 0, sizeof(hole)); + err = i915_gem_gtt_insert(&i915->ggtt.base, &hole, + PRETEND_GGTT_SIZE, 0, I915_COLOR_UNEVICTABLE, + 0, i915->ggtt.base.total, + PIN_NOEVICT); + if (err) + goto out_locked; + + /* Make the GGTT appear small by filling it with unevictable nodes */ + count = 0; + do { + struct reserved *r; + + r = kcalloc(1, sizeof(*r), GFP_KERNEL); + if (!r) { + err = -ENOMEM; + goto out_locked; + } + + if (i915_gem_gtt_insert(&i915->ggtt.base, &r->node, + 1ul << 20, 0, I915_COLOR_UNEVICTABLE, + 0, i915->ggtt.base.total, + PIN_NOEVICT)) { + kfree(r); + break; + } + + r->next = reserved; + reserved = r; + + count++; + } while (1); + drm_mm_remove_node(&hole); + mutex_unlock(&i915->drm.struct_mutex); + pr_info("Filled GGTT with %lu 1MiB nodes\n", count); + + /* Overfill the GGTT with context objects and so try to evict one. */ + for_each_engine(engine, i915, id) { + struct i915_sw_fence fence; + struct drm_file *file; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + count = 0; + mutex_lock(&i915->drm.struct_mutex); + onstack_fence_init(&fence); + do { + struct drm_i915_gem_request *rq; + struct i915_gem_context *ctx; + + ctx = live_context(i915, file); + if (!ctx) + break; + + /* We will need some GGTT space for the rq's context */ + igt_evict_ctl.fail_if_busy = true; + rq = i915_gem_request_alloc(engine, ctx); + igt_evict_ctl.fail_if_busy = false; + + if (IS_ERR(rq)) { + /* When full, fail_if_busy will trigger EBUSY */ + if (PTR_ERR(rq) != -EBUSY) { + pr_err("Unexpected error from request alloc (ctx hw id %u, on %s): %d\n", + ctx->hw_id, engine->name, + (int)PTR_ERR(rq)); + err = PTR_ERR(rq); + } + break; + } + + /* Keep every request/ctx pinned until we are full */ + err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, + &fence, + GFP_KERNEL); + if (err < 0) + break; + + i915_add_request(rq); + count++; + err = 0; + } while(1); + mutex_unlock(&i915->drm.struct_mutex); + + onstack_fence_fini(&fence); + pr_info("Submitted %lu contexts/requests on %s\n", + count, engine->name); + + mock_file_free(i915, file); + if (err) + break; + } + + mutex_lock(&i915->drm.struct_mutex); +out_locked: + while (reserved) { + struct reserved *next = reserved->next; + + drm_mm_remove_node(&reserved->node); + kfree(reserved); + + reserved = next; + } + if (drm_mm_node_allocated(&hole)) + drm_mm_remove_node(&hole); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + int i915_gem_evict_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -348,3 +491,12 @@ int i915_gem_evict_mock_selftests(void) drm_dev_unref(&i915->drm); return err; } + +int i915_gem_evict_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_evict_contexts), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 6b132caffa18..9da0c9f99916 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -39,25 +39,26 @@ static void fake_free_pages(struct drm_i915_gem_object *obj, kfree(pages); } -static struct sg_table * -fake_get_pages(struct drm_i915_gem_object *obj) +static int fake_get_pages(struct drm_i915_gem_object *obj) { #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) #define PFN_BIAS 0x1000 struct sg_table *pages; struct scatterlist *sg; + unsigned int sg_page_sizes; typeof(obj->base.size) rem; pages = kmalloc(sizeof(*pages), GFP); if (!pages) - return ERR_PTR(-ENOMEM); + return -ENOMEM; rem = round_up(obj->base.size, BIT(31)) >> 31; if (sg_alloc_table(pages, rem, GFP)) { kfree(pages); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } + sg_page_sizes = 0; rem = obj->base.size; for (sg = pages->sgl; sg; sg = sg_next(sg)) { unsigned long len = min_t(typeof(rem), rem, BIT(31)); @@ -66,13 +67,17 @@ fake_get_pages(struct drm_i915_gem_object *obj) sg_set_page(sg, pfn_to_page(PFN_BIAS), len, 0); sg_dma_address(sg) = page_to_phys(sg_page(sg)); sg_dma_len(sg) = len; + sg_page_sizes |= len; rem -= len; } GEM_BUG_ON(rem); obj->mm.madv = I915_MADV_DONTNEED; - return pages; + + __i915_gem_object_set_pages(obj, pages, sg_page_sizes); + + return 0; #undef GFP } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 8f011c447e41..1b8774a42e48 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -251,14 +251,6 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, return PTR_ERR(io); } - err = i915_vma_get_fence(vma); - if (err) { - pr_err("Failed to get fence for partial view: offset=%lu\n", - page); - i915_vma_unpin_iomap(vma); - return err; - } - iowrite32(page, io + n * PAGE_SIZE/sizeof(*io)); i915_vma_unpin_iomap(vma); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 6664cb2eb0b8..a999161e8db1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -215,7 +215,9 @@ static int igt_request_rewind(void *arg) } i915_gem_request_get(vip); i915_add_request(vip); + rcu_read_lock(); request->engine->submit_request(request); + rcu_read_unlock(); mutex_unlock(&i915->drm.struct_mutex); @@ -418,7 +420,10 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915) err = PTR_ERR(cmd); goto err; } + *cmd = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(i915); + i915_gem_object_unpin_map(obj); err = i915_gem_object_set_to_gtt_domain(obj, false); @@ -605,8 +610,8 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915) *cmd++ = lower_32_bits(vma->node.start); } *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ + i915_gem_chipset_flush(i915); - wmb(); i915_gem_object_unpin_map(obj); return vma; @@ -625,7 +630,7 @@ static int recursive_batch_resolve(struct i915_vma *batch) return PTR_ERR(cmd); *cmd = MI_BATCH_BUFFER_END; - wmb(); + i915_gem_chipset_flush(batch->vm->i915); i915_gem_object_unpin_map(batch->obj); @@ -858,7 +863,8 @@ out_request: I915_MAP_WC); if (!IS_ERR(cmd)) { *cmd = MI_BATCH_BUFFER_END; - wmb(); + i915_gem_chipset_flush(i915); + i915_gem_object_unpin_map(request[id]->batch->obj); } diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 18b174d855ca..d7dd98a6acad 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* List each unit test as selftest(name, function) * * The name is used as both an enum and expanded as subtest__name to create @@ -15,5 +16,7 @@ selftest(objects, i915_gem_object_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) selftest(gtt, i915_gem_gtt_live_selftests) +selftest(evict, i915_gem_evict_live_selftests) +selftest(hugepages, i915_gem_huge_page_live_selftests) selftest(contexts, i915_gem_context_live_selftests) selftest(hangcheck, intel_hangcheck_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index fc74687501ba..19c6fce837df 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* List each unit test as selftest(name, function) * * The name is used as both an enum and expanded as subtest__name to create @@ -21,3 +22,4 @@ selftest(dmabuf, i915_gem_dmabuf_mock_selftests) selftest(vma, i915_vma_mock_selftests) selftest(evict, i915_gem_evict_mock_selftests) selftest(gtt, i915_gem_gtt_mock_selftests) +selftest(hugepages, i915_gem_huge_page_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c index 19d145d6bf52..ea01d0fe3ace 100644 --- a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c @@ -24,6 +24,7 @@ #include <linux/completion.h> #include <linux/delay.h> +#include <linux/prime_numbers.h> #include "../i915_selftest.h" @@ -565,6 +566,46 @@ err_in: return ret; } +static int test_timer(void *arg) +{ + unsigned long target, delay; + struct timed_fence tf; + + timed_fence_init(&tf, target = jiffies); + if (!i915_sw_fence_done(&tf.fence)) { + pr_err("Fence with immediate expiration not signaled\n"); + goto err; + } + timed_fence_fini(&tf); + + for_each_prime_number(delay, i915_selftest.timeout_jiffies/2) { + timed_fence_init(&tf, target = jiffies + delay); + if (i915_sw_fence_done(&tf.fence)) { + pr_err("Fence with future expiration (%lu jiffies) already signaled\n", delay); + goto err; + } + + i915_sw_fence_wait(&tf.fence); + if (!i915_sw_fence_done(&tf.fence)) { + pr_err("Fence not signaled after wait\n"); + goto err; + } + if (time_before(jiffies, target)) { + pr_err("Fence signaled too early, target=%lu, now=%lu\n", + target, jiffies); + goto err; + } + + timed_fence_fini(&tf); + } + + return 0; + +err: + timed_fence_fini(&tf); + return -EINVAL; +} + int i915_sw_fence_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -576,6 +617,7 @@ int i915_sw_fence_mock_selftests(void) SUBTEST(test_C_AB), SUBTEST(test_chain), SUBTEST(test_ipc), + SUBTEST(test_timer), }; return i915_subtests(tests, NULL); diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c index 828904b7d468..54fc571b1102 100644 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -271,13 +271,7 @@ struct igt_wakeup { u32 seqno; }; -static int wait_atomic(atomic_t *p) -{ - schedule(); - return 0; -} - -static int wait_atomic_timeout(atomic_t *p) +static int wait_atomic_timeout(atomic_t *p, unsigned int mode) { return schedule_timeout(10 * HZ) ? 0 : -ETIMEDOUT; } @@ -348,7 +342,7 @@ static void igt_wake_all_sync(atomic_t *ready, atomic_set(ready, 0); wake_up_all(wq); - wait_on_atomic_t(set, wait_atomic, TASK_UNINTERRUPTIBLE); + wait_on_atomic_t(set, atomic_t_wait, TASK_UNINTERRUPTIBLE); atomic_set(ready, count); atomic_set(done, count); } diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 377c1de766ce..71ce06680d66 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -165,6 +165,7 @@ static int emit_recurse_batch(struct hang *h, *batch++ = lower_32_bits(vma->node.start); } *batch++ = MI_BATCH_BUFFER_END; /* not reached */ + i915_gem_chipset_flush(h->i915); flags = 0; if (INTEL_GEN(vm->i915) <= 5) @@ -231,7 +232,7 @@ static u32 hws_seqno(const struct hang *h, static void hang_fini(struct hang *h) { *h->batch = MI_BATCH_BUFFER_END; - wmb(); + i915_gem_chipset_flush(h->i915); i915_gem_object_unpin_map(h->obj); i915_gem_object_put(h->obj); @@ -275,6 +276,8 @@ static int igt_hang_sanitycheck(void *arg) i915_gem_request_get(rq); *h.batch = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(i915); + __i915_add_request(rq, true); timeout = i915_wait_request(rq, @@ -621,8 +624,11 @@ static int igt_wait_reset(void *arg) __i915_add_request(rq, true); if (!wait_for_hang(&h, rq)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + pr_err("Failed to start request %x, at %x\n", rq->fence.seqno, hws_seqno(&h, rq)); + intel_engine_dump(rq->engine, &p); i915_reset(i915, 0); i915_gem_set_wedged(i915); @@ -713,8 +719,12 @@ static int igt_reset_queue(void *arg) __i915_add_request(rq, true); if (!wait_for_hang(&h, prev)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + pr_err("Failed to start request %x, at %x\n", prev->fence.seqno, hws_seqno(&h, prev)); + intel_engine_dump(rq->engine, &p); + i915_gem_request_put(rq); i915_gem_request_put(prev); @@ -765,7 +775,7 @@ static int igt_reset_queue(void *arg) pr_info("%s: Completed %d resets\n", engine->name, count); *h.batch = MI_BATCH_BUFFER_END; - wmb(); + i915_gem_chipset_flush(i915); i915_gem_request_put(prev); } @@ -815,8 +825,11 @@ static int igt_handle_error(void *arg) __i915_add_request(rq, true); if (!wait_for_hang(&h, rq)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + pr_err("Failed to start request %x, at %x\n", rq->fence.seqno, hws_seqno(&h, rq)); + intel_engine_dump(rq->engine, &p); i915_reset(i915, 0); i915_gem_set_wedged(i915); @@ -865,9 +878,16 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_reset_queue), SUBTEST(igt_handle_error), }; + int err; if (!intel_has_gpu_reset(i915)) return 0; - return i915_subtests(tests, i915); + intel_runtime_pm_get(i915); + + err = i915_subtests(tests, i915); + + intel_runtime_pm_put(i915); + + return err; } diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c new file mode 100644 index 000000000000..3790fdf44a1a --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c @@ -0,0 +1,78 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "lib_sw_fence.h" + +/* Small library of different fence types useful for writing tests */ + +static int __i915_sw_fence_call +nop_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +{ + return NOTIFY_DONE; +} + +void __onstack_fence_init(struct i915_sw_fence *fence, + const char *name, + struct lock_class_key *key) +{ + debug_fence_init_onstack(fence); + + __init_waitqueue_head(&fence->wait, name, key); + atomic_set(&fence->pending, 1); + fence->flags = (unsigned long)nop_fence_notify; +} + +void onstack_fence_fini(struct i915_sw_fence *fence) +{ + i915_sw_fence_commit(fence); + i915_sw_fence_fini(fence); +} + +static void timed_fence_wake(unsigned long data) +{ + struct timed_fence *tf = (struct timed_fence *)data; + + i915_sw_fence_commit(&tf->fence); +} + +void timed_fence_init(struct timed_fence *tf, unsigned long expires) +{ + onstack_fence_init(&tf->fence); + + setup_timer_on_stack(&tf->timer, timed_fence_wake, (unsigned long)tf); + + if (time_after(expires, jiffies)) + mod_timer(&tf->timer, expires); + else + i915_sw_fence_commit(&tf->fence); +} + +void timed_fence_fini(struct timed_fence *tf) +{ + if (del_timer_sync(&tf->timer)) + i915_sw_fence_commit(&tf->fence); + + destroy_timer_on_stack(&tf->timer); + i915_sw_fence_fini(&tf->fence); +} diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.h b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h new file mode 100644 index 000000000000..474aafb92ae1 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h @@ -0,0 +1,42 @@ +/* + * lib_sw_fence.h - library routines for testing N:M synchronisation points + * + * Copyright (C) 2017 Intel Corporation + * + * This file is released under the GPLv2. + * + */ + +#ifndef _LIB_SW_FENCE_H_ +#define _LIB_SW_FENCE_H_ + +#include <linux/timer.h> + +#include "../i915_sw_fence.h" + +#ifdef CONFIG_LOCKDEP +#define onstack_fence_init(fence) \ +do { \ + static struct lock_class_key __key; \ + \ + __onstack_fence_init((fence), #fence, &__key); \ +} while (0) +#else +#define onstack_fence_init(fence) \ + __onstack_fence_init((fence), NULL, NULL) +#endif + +void __onstack_fence_init(struct i915_sw_fence *fence, + const char *name, + struct lock_class_key *key); +void onstack_fence_fini(struct i915_sw_fence *fence); + +struct timed_fence { + struct i915_sw_fence fence; + struct timer_list timer; +}; + +void timed_fence_init(struct timed_fence *tf, unsigned long expires); +void timed_fence_fini(struct timed_fence *tf); + +#endif /* _LIB_SW_FENCE_H_ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c index 098ce643ad07..bbf80d42e793 100644 --- a/drivers/gpu/drm/i915/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -73,11 +73,7 @@ err_put: void mock_context_close(struct i915_gem_context *ctx) { - i915_gem_context_set_closed(ctx); - - i915_ppgtt_close(&ctx->ppgtt->base); - - i915_gem_context_put(ctx); + context_close(ctx); } void mock_init_contexts(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index fc0fd7498689..331c2b09869e 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -32,9 +32,9 @@ static struct mock_request *first_request(struct mock_engine *engine) link); } -static void hw_delay_complete(unsigned long data) +static void hw_delay_complete(struct timer_list *t) { - struct mock_engine *engine = (typeof(engine))data; + struct mock_engine *engine = from_timer(engine, t, hw_delay); struct mock_request *request; spin_lock(&engine->hw_lock); @@ -161,9 +161,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, /* fake hw queue */ spin_lock_init(&engine->hw_lock); - setup_timer(&engine->hw_delay, - hw_delay_complete, - (unsigned long)engine); + timer_setup(&engine->hw_delay, hw_delay_complete, 0); INIT_LIST_HEAD(&engine->hw_queue); return &engine->base; diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 2388424a14da..04eb9362f4f8 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -83,6 +83,8 @@ static void mock_device_release(struct drm_device *dev) kmem_cache_destroy(i915->vmas); kmem_cache_destroy(i915->objects); + i915_gemfs_fini(i915); + drm_dev_fini(&i915->drm); put_device(&i915->drm.pdev->dev); } @@ -146,7 +148,7 @@ struct drm_i915_private *mock_gem_device(void) dev_set_name(&pdev->dev, "mock"); dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); -#if IS_ENABLED(CONFIG_IOMMU_API) +#if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU) /* hack to disable iommu for the fake device; force identity mapping */ pdev->dev.archdata.iommu = (void *)-1; #endif @@ -172,6 +174,11 @@ struct drm_i915_private *mock_gem_device(void) mkwrite_device_info(i915)->gen = -1; + mkwrite_device_info(i915)->page_sizes = + I915_GTT_PAGE_SIZE_4K | + I915_GTT_PAGE_SIZE_64K | + I915_GTT_PAGE_SIZE_2M; + spin_lock_init(&i915->mm.object_stat_lock); mock_uncore_init(i915); @@ -239,8 +246,16 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->kernel_context) goto err_engine; + i915->preempt_context = mock_context(i915, NULL); + if (!i915->preempt_context) + goto err_kernel_context; + + WARN_ON(i915_gemfs_init(i915)); + return i915; +err_kernel_context: + i915_gem_context_put(i915->kernel_context); err_engine: for_each_engine(engine, i915, id) mock_engine_free(engine); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.h b/drivers/gpu/drm/i915/selftests/mock_gem_device.h index 4cca4d57f52c..b5dc4e394555 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.h +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __MOCK_GEM_DEVICE_H__ #define __MOCK_GEM_DEVICE_H__ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/selftests/mock_gem_object.h index 9fbf67321662..20acdbee7bd0 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_object.h +++ b/drivers/gpu/drm/i915/selftests/mock_gem_object.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __MOCK_GEM_OBJECT_H__ #define __MOCK_GEM_OBJECT_H__ diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index f2118cf535a0..336e1afb250f 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -43,7 +43,6 @@ static int mock_bind_ppgtt(struct i915_vma *vma, u32 flags) { GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND); - vma->pages = vma->obj->mm.pages; vma->flags |= I915_VMA_LOCAL_BIND; return 0; } @@ -84,6 +83,8 @@ mock_ppgtt(struct drm_i915_private *i915, ppgtt->base.insert_entries = mock_insert_entries; ppgtt->base.bind_vma = mock_bind_ppgtt; ppgtt->base.unbind_vma = mock_unbind_ppgtt; + ppgtt->base.set_pages = ppgtt_set_pages; + ppgtt->base.clear_pages = clear_pages; ppgtt->base.cleanup = mock_cleanup; return ppgtt; @@ -93,12 +94,6 @@ static int mock_bind_ggtt(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) { - int err; - - err = i915_get_ggtt_vma_pages(vma); - if (err) - return err; - vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; return 0; } @@ -124,6 +119,8 @@ void mock_init_ggtt(struct drm_i915_private *i915) ggtt->base.insert_entries = mock_insert_entries; ggtt->base.bind_vma = mock_bind_ggtt; ggtt->base.unbind_vma = mock_unbind_ggtt; + ggtt->base.set_pages = ggtt_set_pages; + ggtt->base.clear_pages = clear_pages; ggtt->base.cleanup = mock_cleanup; i915_address_space_init(&ggtt->base, i915, "global"); diff --git a/drivers/gpu/drm/i915/selftests/scatterlist.c b/drivers/gpu/drm/i915/selftests/scatterlist.c index 1cc5d2931753..cd6d2a16071f 100644 --- a/drivers/gpu/drm/i915/selftests/scatterlist.c +++ b/drivers/gpu/drm/i915/selftests/scatterlist.c @@ -189,6 +189,20 @@ static unsigned int random(unsigned long n, return 1 + (prandom_u32_state(rnd) % 1024); } +static unsigned int random_page_size_pages(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + /* 4K, 64K, 2M */ + static unsigned int page_count[] = { + BIT(12) >> PAGE_SHIFT, + BIT(16) >> PAGE_SHIFT, + BIT(21) >> PAGE_SHIFT, + }; + + return page_count[(prandom_u32_state(rnd) % 3)]; +} + static inline bool page_contiguous(struct page *first, struct page *last, unsigned long npages) @@ -252,6 +266,7 @@ static const npages_fn_t npages_funcs[] = { grow, shrink, random, + random_page_size_pages, NULL, }; diff --git a/drivers/gpu/drm/imx/Makefile b/drivers/gpu/drm/imx/Makefile index 16ecef33e008..ab6c83caceb7 100644 --- a/drivers/gpu/drm/imx/Makefile +++ b/drivers/gpu/drm/imx/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 imxdrm-objs := imx-drm-core.o ipuv3-crtc.o ipuv3-plane.o diff --git a/drivers/gpu/drm/imx/imx-drm.h b/drivers/gpu/drm/imx/imx-drm.h index f6dd64be9cd5..f0b7556c0857 100644 --- a/drivers/gpu/drm/imx/imx-drm.h +++ b/drivers/gpu/drm/imx/imx-drm.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _IMX_DRM_H_ #define _IMX_DRM_H_ diff --git a/drivers/gpu/drm/imx/ipuv3-plane.h b/drivers/gpu/drm/imx/ipuv3-plane.h index 596b24ddbf65..e563ea17a827 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.h +++ b/drivers/gpu/drm/imx/ipuv3-plane.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __IPUV3_PLANE_H__ #define __IPUV3_PLANE_H__ diff --git a/drivers/gpu/drm/lib/drm_random.c b/drivers/gpu/drm/lib/drm_random.c index a78c4b483e8d..eeb155826d27 100644 --- a/drivers/gpu/drm/lib/drm_random.c +++ b/drivers/gpu/drm/lib/drm_random.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/bitops.h> #include <linux/kernel.h> #include <linux/random.h> diff --git a/drivers/gpu/drm/lib/drm_random.h b/drivers/gpu/drm/lib/drm_random.h index a78644bea7f9..4a3e94dfa0c0 100644 --- a/drivers/gpu/drm/lib/drm_random.h +++ b/drivers/gpu/drm/lib/drm_random.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __DRM_RANDOM_H__ #define __DRM_RANDOM_H__ diff --git a/drivers/gpu/drm/mediatek/Makefile b/drivers/gpu/drm/mediatek/Makefile index e37b55a23a65..ce83c396a742 100644 --- a/drivers/gpu/drm/mediatek/Makefile +++ b/drivers/gpu/drm/mediatek/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 mediatek-drm-y := mtk_disp_color.o \ mtk_disp_ovl.o \ mtk_disp_rdma.o \ diff --git a/drivers/gpu/drm/mgag200/mgag200_reg.h b/drivers/gpu/drm/mgag200/mgag200_reg.h index 3ae442a64bd6..c096a9d6bcbc 100644 --- a/drivers/gpu/drm/mgag200/mgag200_reg.h +++ b/drivers/gpu/drm/mgag200/mgag200_reg.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * MGA Millennium (MGA2064W) functions * MGA Mystique (MGA1064SG) functions diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 33008fa1be9b..92b3844202d2 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 ccflags-y := -Idrivers/gpu/drm/msm ccflags-$(CONFIG_DRM_MSM_DSI) += -Idrivers/gpu/drm/msm/dsi @@ -8,6 +9,7 @@ msm-y := \ adreno/a4xx_gpu.o \ adreno/a5xx_gpu.o \ adreno/a5xx_power.o \ + adreno/a5xx_preempt.o \ hdmi/hdmi.o \ hdmi/hdmi_audio.o \ hdmi/hdmi_bridge.o \ @@ -57,7 +59,8 @@ msm-y := \ msm_iommu.o \ msm_perf.o \ msm_rd.o \ - msm_ringbuffer.o + msm_ringbuffer.o \ + msm_submitqueue.o msm-$(CONFIG_DRM_FBDEV_EMULATION) += msm_fbdev.o msm-$(CONFIG_COMMON_CLK) += mdp/mdp4/mdp4_lvds_pll.o diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 7791313405b5..4baef2738178 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -44,7 +44,7 @@ static bool a3xx_idle(struct msm_gpu *gpu); static bool a3xx_me_init(struct msm_gpu *gpu) { - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT3(ring, CP_ME_INIT, 17); OUT_RING(ring, 0x000003f7); @@ -65,7 +65,7 @@ static bool a3xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); return a3xx_idle(gpu); } @@ -339,7 +339,7 @@ static void a3xx_destroy(struct msm_gpu *gpu) static bool a3xx_idle(struct msm_gpu *gpu) { /* wait for ringbuffer to drain: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, gpu->rb[0])) return false; /* then wait for GPU to finish: */ @@ -444,9 +444,9 @@ static const struct adreno_gpu_funcs funcs = { .pm_suspend = msm_gpu_pm_suspend, .pm_resume = msm_gpu_pm_resume, .recover = a3xx_recover, - .last_fence = adreno_last_fence, .submit = adreno_submit, .flush = adreno_flush, + .active_ring = adreno_active_ring, .irq = a3xx_irq, .destroy = a3xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -492,7 +492,7 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) adreno_gpu->registers = a3xx_registers; adreno_gpu->reg_offsets = a3xx_register_offsets; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index 58341ef6f15b..8199a4b9f2fa 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -116,7 +116,7 @@ static void a4xx_enable_hwcg(struct msm_gpu *gpu) static bool a4xx_me_init(struct msm_gpu *gpu) { - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT3(ring, CP_ME_INIT, 17); OUT_RING(ring, 0x000003f7); @@ -137,7 +137,7 @@ static bool a4xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); return a4xx_idle(gpu); } @@ -337,7 +337,7 @@ static void a4xx_destroy(struct msm_gpu *gpu) static bool a4xx_idle(struct msm_gpu *gpu) { /* wait for ringbuffer to drain: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, gpu->rb[0])) return false; /* then wait for GPU to finish: */ @@ -532,9 +532,9 @@ static const struct adreno_gpu_funcs funcs = { .pm_suspend = a4xx_pm_suspend, .pm_resume = a4xx_pm_resume, .recover = a4xx_recover, - .last_fence = adreno_last_fence, .submit = adreno_submit, .flush = adreno_flush, + .active_ring = adreno_active_ring, .irq = a4xx_irq, .destroy = a4xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -574,7 +574,7 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) adreno_gpu->registers = a4xx_registers; adreno_gpu->reg_offsets = a4xx_register_offsets; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 17c59d839e6f..a1f4eeeb73e2 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -26,8 +26,9 @@ static void a5xx_dump(struct msm_gpu *gpu); #define GPU_PAS_ID 13 -static int zap_shader_load_mdt(struct device *dev, const char *fwname) +static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname) { + struct device *dev = &gpu->pdev->dev; const struct firmware *fw; struct device_node *np; struct resource r; @@ -55,10 +56,10 @@ static int zap_shader_load_mdt(struct device *dev, const char *fwname) mem_size = resource_size(&r); /* Request the MDT file for the firmware */ - ret = request_firmware(&fw, fwname, dev); - if (ret) { + fw = adreno_request_fw(to_adreno_gpu(gpu), fwname); + if (IS_ERR(fw)) { DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname); - return ret; + return PTR_ERR(fw); } /* Figure out how much memory we need */ @@ -75,9 +76,26 @@ static int zap_shader_load_mdt(struct device *dev, const char *fwname) goto out; } - /* Load the rest of the MDT */ - ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID, mem_region, mem_phys, - mem_size); + /* + * Load the rest of the MDT + * + * Note that we could be dealing with two different paths, since + * with upstream linux-firmware it would be in a qcom/ subdir.. + * adreno_request_fw() handles this, but qcom_mdt_load() does + * not. But since we've already gotten thru adreno_request_fw() + * we know which of the two cases it is: + */ + if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) { + ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID, + mem_region, mem_phys, mem_size); + } else { + char newname[strlen("qcom/") + strlen(fwname) + 1]; + + sprintf(newname, "qcom/%s", fwname); + + ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID, + mem_region, mem_phys, mem_size); + } if (ret) goto out; @@ -95,14 +113,65 @@ out: return ret; } +static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + uint32_t wptr; + unsigned long flags; + + spin_lock_irqsave(&ring->lock, flags); + + /* Copy the shadow to the actual register */ + ring->cur = ring->next; + + /* Make sure to wrap wptr if we need to */ + wptr = get_wptr(ring); + + spin_unlock_irqrestore(&ring->lock, flags); + + /* Make sure everything is posted before making a decision */ + mb(); + + /* Update HW if this is the current ring and we are not in preempt */ + if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu)) + gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr); +} + static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); struct msm_drm_private *priv = gpu->dev->dev_private; - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; + OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x02); + + /* Turn off protected mode to write to special registers */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 0); + + /* Set the save preemption record for the ring/command */ + OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); + OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); + OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); + + /* Turn back on protected mode */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 1); + + /* Enable local preemption for finegrain preemption */ + OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x02); + + /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */ + OUT_PKT7(ring, CP_YIELD_ENABLE, 1); + OUT_RING(ring, 0x02); + + /* Submit the commands */ for (i = 0; i < submit->nr_cmds; i++) { switch (submit->cmd[i].type) { case MSM_SUBMIT_CMD_IB_TARGET_BUF: @@ -120,16 +189,54 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, } } + /* + * Write the render mode to NULL (0) to indicate to the CP that the IBs + * are done rendering - otherwise a lucky preemption would start + * replaying from the last checkpoint + */ + OUT_PKT7(ring, CP_SET_RENDER_MODE, 5); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + + /* Turn off IB level preemptions */ + OUT_PKT7(ring, CP_YIELD_ENABLE, 1); + OUT_RING(ring, 0x01); + + /* Write the fence to the scratch register */ OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1); - OUT_RING(ring, submit->fence->seqno); + OUT_RING(ring, submit->seqno); + /* + * Execute a CACHE_FLUSH_TS event. This will ensure that the + * timestamp is written to the memory and then triggers the interrupt + */ OUT_PKT7(ring, CP_EVENT_WRITE, 4); OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31)); - OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, fence))); - OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, fence))); - OUT_RING(ring, submit->fence->seqno); + OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); + OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); + OUT_RING(ring, submit->seqno); - gpu->funcs->flush(gpu); + /* Yield the floor on command completion */ + OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); + /* + * If dword[2:1] are non zero, they specify an address for the CP to + * write the value of dword[3] to on preemption complete. Write 0 to + * skip the write + */ + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x00); + /* Data value - not used if the address above is 0 */ + OUT_RING(ring, 0x01); + /* Set bit 0 to trigger an interrupt on preempt complete */ + OUT_RING(ring, 0x01); + + a5xx_flush(gpu, ring); + + /* Check to see if we need to start preemption */ + a5xx_preempt_trigger(gpu); } static const struct { @@ -245,7 +352,7 @@ void a5xx_set_hwcg(struct msm_gpu *gpu, bool state) static int a5xx_me_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT7(ring, CP_ME_INIT, 8); @@ -276,11 +383,54 @@ static int a5xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); + return a5xx_idle(gpu, ring) ? 0 : -EINVAL; +} + +static int a5xx_preempt_start(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + struct msm_ringbuffer *ring = gpu->rb[0]; + + if (gpu->nr_rings == 1) + return 0; + + /* Turn off protected mode to write to special registers */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 0); + + /* Set the save preemption record for the ring/command */ + OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); + OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id])); + OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id])); + + /* Turn back on protected mode */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 1); + + OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x00); + + OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1); + OUT_RING(ring, 0x01); - return a5xx_idle(gpu) ? 0 : -EINVAL; + OUT_PKT7(ring, CP_YIELD_ENABLE, 1); + OUT_RING(ring, 0x01); + + /* Yield the floor on command completion */ + OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x01); + OUT_RING(ring, 0x01); + + gpu->funcs->flush(gpu, ring); + + return a5xx_idle(gpu, ring) ? 0 : -EINVAL; } + static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu, const struct firmware *fw, u64 *iova) { @@ -381,7 +531,7 @@ static int a5xx_zap_shader_init(struct msm_gpu *gpu) return -ENODEV; } - ret = zap_shader_load_mdt(&pdev->dev, adreno_gpu->info->zapfw); + ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw); loaded = !ret; @@ -396,6 +546,7 @@ static int a5xx_zap_shader_init(struct msm_gpu *gpu) A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \ A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \ + A5XX_RBBM_INT_0_MASK_CP_SW | \ A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) @@ -536,13 +687,14 @@ static int a5xx_hw_init(struct msm_gpu *gpu) REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000); gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); - /* Load the GPMU firmware before starting the HW init */ - a5xx_gpmu_ucode_init(gpu); - ret = adreno_hw_init(gpu); if (ret) return ret; + a5xx_preempt_hw_init(gpu); + + a5xx_gpmu_ucode_init(gpu); + ret = a5xx_ucode_init(gpu); if (ret) return ret; @@ -565,11 +717,11 @@ static int a5xx_hw_init(struct msm_gpu *gpu) * ticking correctly */ if (adreno_is_a530(adreno_gpu)) { - OUT_PKT7(gpu->rb, CP_EVENT_WRITE, 1); - OUT_RING(gpu->rb, 0x0F); + OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1); + OUT_RING(gpu->rb[0], 0x0F); - gpu->funcs->flush(gpu); - if (!a5xx_idle(gpu)) + gpu->funcs->flush(gpu, gpu->rb[0]); + if (!a5xx_idle(gpu, gpu->rb[0])) return -EINVAL; } @@ -582,11 +734,11 @@ static int a5xx_hw_init(struct msm_gpu *gpu) */ ret = a5xx_zap_shader_init(gpu); if (!ret) { - OUT_PKT7(gpu->rb, CP_SET_SECURE_MODE, 1); - OUT_RING(gpu->rb, 0x00000000); + OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); + OUT_RING(gpu->rb[0], 0x00000000); - gpu->funcs->flush(gpu); - if (!a5xx_idle(gpu)) + gpu->funcs->flush(gpu, gpu->rb[0]); + if (!a5xx_idle(gpu, gpu->rb[0])) return -EINVAL; } else { /* Print a warning so if we die, we know why */ @@ -595,6 +747,9 @@ static int a5xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0); } + /* Last step - yield the ringbuffer */ + a5xx_preempt_start(gpu); + return 0; } @@ -625,6 +780,8 @@ static void a5xx_destroy(struct msm_gpu *gpu) DBG("%s", gpu->name); + a5xx_preempt_fini(gpu); + if (a5xx_gpu->pm4_bo) { if (a5xx_gpu->pm4_iova) msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace); @@ -660,18 +817,27 @@ static inline bool _a5xx_check_idle(struct msm_gpu *gpu) A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT); } -bool a5xx_idle(struct msm_gpu *gpu) +bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + + if (ring != a5xx_gpu->cur_ring) { + WARN(1, "Tried to idle a non-current ringbuffer\n"); + return false; + } + /* wait for CP to drain ringbuffer: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, ring)) return false; if (spin_until(_a5xx_check_idle(gpu))) { - DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X\n", + DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", gpu->name, __builtin_return_address(0), gpu_read(gpu, REG_A5XX_RBBM_STATUS), - gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS)); - + gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS), + gpu_read(gpu, REG_A5XX_CP_RB_RPTR), + gpu_read(gpu, REG_A5XX_CP_RB_WPTR)); return false; } @@ -802,9 +968,10 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu) { struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; + struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); - dev_err(dev->dev, "gpu fault fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", - gpu->funcs->last_fence(gpu), + dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", + ring ? ring->id : -1, ring ? ring->seqno : 0, gpu_read(gpu, REG_A5XX_RBBM_STATUS), gpu_read(gpu, REG_A5XX_CP_RB_RPTR), gpu_read(gpu, REG_A5XX_CP_RB_WPTR), @@ -854,8 +1021,13 @@ static irqreturn_t a5xx_irq(struct msm_gpu *gpu) if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) a5xx_gpmu_err_irq(gpu); - if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) + if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { + a5xx_preempt_trigger(gpu); msm_gpu_retire(gpu); + } + + if (status & A5XX_RBBM_INT_0_MASK_CP_SW) + a5xx_preempt_irq(gpu); return IRQ_HANDLED; } @@ -985,6 +1157,14 @@ static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m) } #endif +static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + + return a5xx_gpu->cur_ring; +} + static const struct adreno_gpu_funcs funcs = { .base = { .get_param = adreno_get_param, @@ -992,9 +1172,9 @@ static const struct adreno_gpu_funcs funcs = { .pm_suspend = a5xx_pm_suspend, .pm_resume = a5xx_pm_resume, .recover = a5xx_recover, - .last_fence = adreno_last_fence, .submit = a5xx_submit, - .flush = adreno_flush, + .flush = a5xx_flush, + .active_ring = a5xx_active_ring, .irq = a5xx_irq, .destroy = a5xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -1030,7 +1210,7 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) a5xx_gpu->lm_leakage = 0x4E001A; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4); if (ret) { a5xx_destroy(&(a5xx_gpu->base.base)); return ERR_PTR(ret); @@ -1039,5 +1219,8 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) if (gpu->aspace) msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler); + /* Set up the preemption specific bits and pieces for each ringbuffer */ + a5xx_preempt_init(gpu); + return gpu; } diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h index e94451685bf8..6fb8c2f9b9e4 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2016 The Linux Foundation. All rights reserved. +/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -35,10 +35,100 @@ struct a5xx_gpu { uint32_t gpmu_dwords; uint32_t lm_leakage; + + struct msm_ringbuffer *cur_ring; + struct msm_ringbuffer *next_ring; + + struct drm_gem_object *preempt_bo[MSM_GPU_MAX_RINGS]; + struct a5xx_preempt_record *preempt[MSM_GPU_MAX_RINGS]; + uint64_t preempt_iova[MSM_GPU_MAX_RINGS]; + + atomic_t preempt_state; + struct timer_list preempt_timer; }; #define to_a5xx_gpu(x) container_of(x, struct a5xx_gpu, base) +/* + * In order to do lockless preemption we use a simple state machine to progress + * through the process. + * + * PREEMPT_NONE - no preemption in progress. Next state START. + * PREEMPT_START - The trigger is evaulating if preemption is possible. Next + * states: TRIGGERED, NONE + * PREEMPT_ABORT - An intermediate state before moving back to NONE. Next + * state: NONE. + * PREEMPT_TRIGGERED: A preemption has been executed on the hardware. Next + * states: FAULTED, PENDING + * PREEMPT_FAULTED: A preemption timed out (never completed). This will trigger + * recovery. Next state: N/A + * PREEMPT_PENDING: Preemption complete interrupt fired - the callback is + * checking the success of the operation. Next state: FAULTED, NONE. + */ + +enum preempt_state { + PREEMPT_NONE = 0, + PREEMPT_START, + PREEMPT_ABORT, + PREEMPT_TRIGGERED, + PREEMPT_FAULTED, + PREEMPT_PENDING, +}; + +/* + * struct a5xx_preempt_record is a shared buffer between the microcode and the + * CPU to store the state for preemption. The record itself is much larger + * (64k) but most of that is used by the CP for storage. + * + * There is a preemption record assigned per ringbuffer. When the CPU triggers a + * preemption, it fills out the record with the useful information (wptr, ring + * base, etc) and the microcode uses that information to set up the CP following + * the preemption. When a ring is switched out, the CP will save the ringbuffer + * state back to the record. In this way, once the records are properly set up + * the CPU can quickly switch back and forth between ringbuffers by only + * updating a few registers (often only the wptr). + * + * These are the CPU aware registers in the record: + * @magic: Must always be 0x27C4BAFC + * @info: Type of the record - written 0 by the CPU, updated by the CP + * @data: Data field from SET_RENDER_MODE or a checkpoint. Written and used by + * the CP + * @cntl: Value of RB_CNTL written by CPU, save/restored by CP + * @rptr: Value of RB_RPTR written by CPU, save/restored by CP + * @wptr: Value of RB_WPTR written by CPU, save/restored by CP + * @rptr_addr: Value of RB_RPTR_ADDR written by CPU, save/restored by CP + * @rbase: Value of RB_BASE written by CPU, save/restored by CP + * @counter: GPU address of the storage area for the performance counters + */ +struct a5xx_preempt_record { + uint32_t magic; + uint32_t info; + uint32_t data; + uint32_t cntl; + uint32_t rptr; + uint32_t wptr; + uint64_t rptr_addr; + uint64_t rbase; + uint64_t counter; +}; + +/* Magic identifier for the preemption record */ +#define A5XX_PREEMPT_RECORD_MAGIC 0x27C4BAFCUL + +/* + * Even though the structure above is only a few bytes, we need a full 64k to + * store the entire preemption record from the CP + */ +#define A5XX_PREEMPT_RECORD_SIZE (64 * 1024) + +/* + * The preemption counter block is a storage area for the value of the + * preemption counters that are saved immediately before context switch. We + * append it on to the end of the allocation for the preemption record. + */ +#define A5XX_PREEMPT_COUNTER_SIZE (16 * 4) + + int a5xx_power_init(struct msm_gpu *gpu); void a5xx_gpmu_ucode_init(struct msm_gpu *gpu); @@ -55,7 +145,22 @@ static inline int spin_usecs(struct msm_gpu *gpu, uint32_t usecs, return -ETIMEDOUT; } -bool a5xx_idle(struct msm_gpu *gpu); +bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring); void a5xx_set_hwcg(struct msm_gpu *gpu, bool state); +void a5xx_preempt_init(struct msm_gpu *gpu); +void a5xx_preempt_hw_init(struct msm_gpu *gpu); +void a5xx_preempt_trigger(struct msm_gpu *gpu); +void a5xx_preempt_irq(struct msm_gpu *gpu); +void a5xx_preempt_fini(struct msm_gpu *gpu); + +/* Return true if we are in a preempt state */ +static inline bool a5xx_in_preempt(struct a5xx_gpu *a5xx_gpu) +{ + int preempt_state = atomic_read(&a5xx_gpu->preempt_state); + + return !(preempt_state == PREEMPT_NONE || + preempt_state == PREEMPT_ABORT); +} + #endif /* __A5XX_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index 04aab1dcae2b..e5700bbf09dd 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -173,7 +173,7 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; if (!a5xx_gpu->gpmu_dwords) return 0; @@ -192,9 +192,9 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu) OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); OUT_RING(ring, 1); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); - if (!a5xx_idle(gpu)) { + if (!a5xx_idle(gpu, ring)) { DRM_ERROR("%s: Unable to load GPMU firmware. GPMU will not be active\n", gpu->name); return -EINVAL; @@ -264,7 +264,8 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu) return; /* Get the firmware */ - if (request_firmware(&fw, adreno_gpu->info->gpmufw, drm->dev)) { + fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->gpmufw); + if (IS_ERR(fw)) { DRM_ERROR("%s: Could not get GPMU firmware. GPMU will not be active\n", gpu->name); return; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c new file mode 100644 index 000000000000..40f4840ef98e --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c @@ -0,0 +1,305 @@ +/* Copyright (c) 2017 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "msm_gem.h" +#include "a5xx_gpu.h" + +/* + * Try to transition the preemption state from old to new. Return + * true on success or false if the original state wasn't 'old' + */ +static inline bool try_preempt_state(struct a5xx_gpu *a5xx_gpu, + enum preempt_state old, enum preempt_state new) +{ + enum preempt_state cur = atomic_cmpxchg(&a5xx_gpu->preempt_state, + old, new); + + return (cur == old); +} + +/* + * Force the preemption state to the specified state. This is used in cases + * where the current state is known and won't change + */ +static inline void set_preempt_state(struct a5xx_gpu *gpu, + enum preempt_state new) +{ + /* + * preempt_state may be read by other cores trying to trigger a + * preemption or in the interrupt handler so barriers are needed + * before... + */ + smp_mb__before_atomic(); + atomic_set(&gpu->preempt_state, new); + /* ... and after*/ + smp_mb__after_atomic(); +} + +/* Write the most recent wptr for the given ring into the hardware */ +static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + unsigned long flags; + uint32_t wptr; + + if (!ring) + return; + + spin_lock_irqsave(&ring->lock, flags); + wptr = get_wptr(ring); + spin_unlock_irqrestore(&ring->lock, flags); + + gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr); +} + +/* Return the highest priority ringbuffer with something in it */ +static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu) +{ + unsigned long flags; + int i; + + for (i = 0; i < gpu->nr_rings; i++) { + bool empty; + struct msm_ringbuffer *ring = gpu->rb[i]; + + spin_lock_irqsave(&ring->lock, flags); + empty = (get_wptr(ring) == ring->memptrs->rptr); + spin_unlock_irqrestore(&ring->lock, flags); + + if (!empty) + return ring; + } + + return NULL; +} + +static void a5xx_preempt_timer(unsigned long data) +{ + struct a5xx_gpu *a5xx_gpu = (struct a5xx_gpu *) data; + struct msm_gpu *gpu = &a5xx_gpu->base.base; + struct drm_device *dev = gpu->dev; + struct msm_drm_private *priv = dev->dev_private; + + if (!try_preempt_state(a5xx_gpu, PREEMPT_TRIGGERED, PREEMPT_FAULTED)) + return; + + dev_err(dev->dev, "%s: preemption timed out\n", gpu->name); + queue_work(priv->wq, &gpu->recover_work); +} + +/* Try to trigger a preemption switch */ +void a5xx_preempt_trigger(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + unsigned long flags; + struct msm_ringbuffer *ring; + + if (gpu->nr_rings == 1) + return; + + /* + * Try to start preemption by moving from NONE to START. If + * unsuccessful, a preemption is already in flight + */ + if (!try_preempt_state(a5xx_gpu, PREEMPT_NONE, PREEMPT_START)) + return; + + /* Get the next ring to preempt to */ + ring = get_next_ring(gpu); + + /* + * If no ring is populated or the highest priority ring is the current + * one do nothing except to update the wptr to the latest and greatest + */ + if (!ring || (a5xx_gpu->cur_ring == ring)) { + /* + * Its possible that while a preemption request is in progress + * from an irq context, a user context trying to submit might + * fail to update the write pointer, because it determines + * that the preempt state is not PREEMPT_NONE. + * + * Close the race by introducing an intermediate + * state PREEMPT_ABORT to let the submit path + * know that the ringbuffer is not going to change + * and can safely update the write pointer. + */ + + set_preempt_state(a5xx_gpu, PREEMPT_ABORT); + update_wptr(gpu, a5xx_gpu->cur_ring); + set_preempt_state(a5xx_gpu, PREEMPT_NONE); + return; + } + + /* Make sure the wptr doesn't update while we're in motion */ + spin_lock_irqsave(&ring->lock, flags); + a5xx_gpu->preempt[ring->id]->wptr = get_wptr(ring); + spin_unlock_irqrestore(&ring->lock, flags); + + /* Set the address of the incoming preemption record */ + gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO, + REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI, + a5xx_gpu->preempt_iova[ring->id]); + + a5xx_gpu->next_ring = ring; + + /* Start a timer to catch a stuck preemption */ + mod_timer(&a5xx_gpu->preempt_timer, jiffies + msecs_to_jiffies(10000)); + + /* Set the preemption state to triggered */ + set_preempt_state(a5xx_gpu, PREEMPT_TRIGGERED); + + /* Make sure everything is written before hitting the button */ + wmb(); + + /* And actually start the preemption */ + gpu_write(gpu, REG_A5XX_CP_CONTEXT_SWITCH_CNTL, 1); +} + +void a5xx_preempt_irq(struct msm_gpu *gpu) +{ + uint32_t status; + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + struct drm_device *dev = gpu->dev; + struct msm_drm_private *priv = dev->dev_private; + + if (!try_preempt_state(a5xx_gpu, PREEMPT_TRIGGERED, PREEMPT_PENDING)) + return; + + /* Delete the preemption watchdog timer */ + del_timer(&a5xx_gpu->preempt_timer); + + /* + * The hardware should be setting CP_CONTEXT_SWITCH_CNTL to zero before + * firing the interrupt, but there is a non zero chance of a hardware + * condition or a software race that could set it again before we have a + * chance to finish. If that happens, log and go for recovery + */ + status = gpu_read(gpu, REG_A5XX_CP_CONTEXT_SWITCH_CNTL); + if (unlikely(status)) { + set_preempt_state(a5xx_gpu, PREEMPT_FAULTED); + dev_err(dev->dev, "%s: Preemption failed to complete\n", + gpu->name); + queue_work(priv->wq, &gpu->recover_work); + return; + } + + a5xx_gpu->cur_ring = a5xx_gpu->next_ring; + a5xx_gpu->next_ring = NULL; + + update_wptr(gpu, a5xx_gpu->cur_ring); + + set_preempt_state(a5xx_gpu, PREEMPT_NONE); +} + +void a5xx_preempt_hw_init(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + int i; + + for (i = 0; i < gpu->nr_rings; i++) { + a5xx_gpu->preempt[i]->wptr = 0; + a5xx_gpu->preempt[i]->rptr = 0; + a5xx_gpu->preempt[i]->rbase = gpu->rb[i]->iova; + } + + /* Write a 0 to signal that we aren't switching pagetables */ + gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO, + REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, 0); + + /* Reset the preemption state */ + set_preempt_state(a5xx_gpu, PREEMPT_NONE); + + /* Always come up on rb 0 */ + a5xx_gpu->cur_ring = gpu->rb[0]; +} + +static int preempt_init_ring(struct a5xx_gpu *a5xx_gpu, + struct msm_ringbuffer *ring) +{ + struct adreno_gpu *adreno_gpu = &a5xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; + struct a5xx_preempt_record *ptr; + struct drm_gem_object *bo = NULL; + u64 iova = 0; + + ptr = msm_gem_kernel_new(gpu->dev, + A5XX_PREEMPT_RECORD_SIZE + A5XX_PREEMPT_COUNTER_SIZE, + MSM_BO_UNCACHED, gpu->aspace, &bo, &iova); + + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + + a5xx_gpu->preempt_bo[ring->id] = bo; + a5xx_gpu->preempt_iova[ring->id] = iova; + a5xx_gpu->preempt[ring->id] = ptr; + + /* Set up the defaults on the preemption record */ + + ptr->magic = A5XX_PREEMPT_RECORD_MAGIC; + ptr->info = 0; + ptr->data = 0; + ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT; + ptr->rptr_addr = rbmemptr(ring, rptr); + ptr->counter = iova + A5XX_PREEMPT_RECORD_SIZE; + + return 0; +} + +void a5xx_preempt_fini(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + int i; + + for (i = 0; i < gpu->nr_rings; i++) { + if (!a5xx_gpu->preempt_bo[i]) + continue; + + msm_gem_put_vaddr(a5xx_gpu->preempt_bo[i]); + + if (a5xx_gpu->preempt_iova[i]) + msm_gem_put_iova(a5xx_gpu->preempt_bo[i], gpu->aspace); + + drm_gem_object_unreference(a5xx_gpu->preempt_bo[i]); + a5xx_gpu->preempt_bo[i] = NULL; + } +} + +void a5xx_preempt_init(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + int i; + + /* No preemption if we only have one ring */ + if (gpu->nr_rings <= 1) + return; + + for (i = 0; i < gpu->nr_rings; i++) { + if (preempt_init_ring(a5xx_gpu, gpu->rb[i])) { + /* + * On any failure our adventure is over. Clean up and + * set nr_rings to 1 to force preemption off + */ + a5xx_preempt_fini(gpu); + gpu->nr_rings = 1; + + return; + } + } + + setup_timer(&a5xx_gpu->preempt_timer, a5xx_preempt_timer, + (unsigned long) a5xx_gpu); +} diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index c75c4df4bc39..05022ea2a007 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -125,51 +125,24 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev) { struct msm_drm_private *priv = dev->dev_private; struct platform_device *pdev = priv->gpu_pdev; - struct adreno_platform_config *config; - struct adreno_rev rev; - const struct adreno_info *info; - struct msm_gpu *gpu = NULL; + struct msm_gpu *gpu = platform_get_drvdata(priv->gpu_pdev); + int ret; - if (!pdev) { + if (!gpu) { dev_err(dev->dev, "no adreno device\n"); return NULL; } - config = pdev->dev.platform_data; - rev = config->rev; - info = adreno_info(config->rev); - - if (!info) { - dev_warn(dev->dev, "Unknown GPU revision: %u.%u.%u.%u\n", - rev.core, rev.major, rev.minor, rev.patchid); + pm_runtime_get_sync(&pdev->dev); + mutex_lock(&dev->struct_mutex); + ret = msm_gpu_hw_init(gpu); + mutex_unlock(&dev->struct_mutex); + pm_runtime_put_sync(&pdev->dev); + if (ret) { + dev_err(dev->dev, "gpu hw init failed: %d\n", ret); return NULL; } - DBG("Found GPU: %u.%u.%u.%u", rev.core, rev.major, - rev.minor, rev.patchid); - - gpu = info->init(dev); - if (IS_ERR(gpu)) { - dev_warn(dev->dev, "failed to load adreno gpu\n"); - gpu = NULL; - /* not fatal */ - } - - if (gpu) { - int ret; - - pm_runtime_get_sync(&pdev->dev); - mutex_lock(&dev->struct_mutex); - ret = msm_gpu_hw_init(gpu); - mutex_unlock(&dev->struct_mutex); - pm_runtime_put_sync(&pdev->dev); - if (ret) { - dev_err(dev->dev, "gpu hw init failed: %d\n", ret); - gpu->funcs->destroy(gpu); - gpu = NULL; - } - } - return gpu; } @@ -282,6 +255,9 @@ static int adreno_get_pwrlevels(struct device *dev, static int adreno_bind(struct device *dev, struct device *master, void *data) { static struct adreno_platform_config config = {}; + const struct adreno_info *info; + struct drm_device *drm = dev_get_drvdata(master); + struct msm_gpu *gpu; u32 val; int ret; @@ -302,13 +278,39 @@ static int adreno_bind(struct device *dev, struct device *master, void *data) return ret; dev->platform_data = &config; - set_gpu_pdev(dev_get_drvdata(master), to_platform_device(dev)); + set_gpu_pdev(drm, to_platform_device(dev)); + + info = adreno_info(config.rev); + + if (!info) { + dev_warn(drm->dev, "Unknown GPU revision: %u.%u.%u.%u\n", + config.rev.core, config.rev.major, + config.rev.minor, config.rev.patchid); + return -ENXIO; + } + + DBG("Found GPU: %u.%u.%u.%u", config.rev.core, config.rev.major, + config.rev.minor, config.rev.patchid); + + gpu = info->init(drm); + if (IS_ERR(gpu)) { + dev_warn(drm->dev, "failed to load adreno gpu\n"); + return PTR_ERR(gpu); + } + + dev_set_drvdata(dev, gpu); + return 0; } static void adreno_unbind(struct device *dev, struct device *master, void *data) { + struct msm_gpu *gpu = dev_get_drvdata(dev); + + gpu->funcs->pm_suspend(gpu); + gpu->funcs->destroy(gpu); + set_gpu_pdev(dev_get_drvdata(master), NULL); } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index c8b4ac254bb5..e2ffecce59a3 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -21,8 +21,6 @@ #include "msm_gem.h" #include "msm_mmu.h" -#define RB_SIZE SZ_32K -#define RB_BLKSIZE 32 int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) { @@ -58,72 +56,181 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) return ret; } return -EINVAL; + case MSM_PARAM_NR_RINGS: + *value = gpu->nr_rings; + return 0; default: DBG("%s: invalid param: %u", gpu->name, param); return -EINVAL; } } +const struct firmware * +adreno_request_fw(struct adreno_gpu *adreno_gpu, const char *fwname) +{ + struct drm_device *drm = adreno_gpu->base.dev; + const struct firmware *fw = NULL; + char newname[strlen("qcom/") + strlen(fwname) + 1]; + int ret; + + sprintf(newname, "qcom/%s", fwname); + + /* + * Try first to load from qcom/$fwfile using a direct load (to avoid + * a potential timeout waiting for usermode helper) + */ + if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || + (adreno_gpu->fwloc == FW_LOCATION_NEW)) { + + ret = request_firmware_direct(&fw, newname, drm->dev); + if (!ret) { + dev_info(drm->dev, "loaded %s from new location\n", + newname); + adreno_gpu->fwloc = FW_LOCATION_NEW; + return fw; + } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { + dev_err(drm->dev, "failed to load %s: %d\n", + newname, ret); + return ERR_PTR(ret); + } + } + + /* + * Then try the legacy location without qcom/ prefix + */ + if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || + (adreno_gpu->fwloc == FW_LOCATION_LEGACY)) { + + ret = request_firmware_direct(&fw, fwname, drm->dev); + if (!ret) { + dev_info(drm->dev, "loaded %s from legacy location\n", + newname); + adreno_gpu->fwloc = FW_LOCATION_LEGACY; + return fw; + } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { + dev_err(drm->dev, "failed to load %s: %d\n", + fwname, ret); + return ERR_PTR(ret); + } + } + + /* + * Finally fall back to request_firmware() for cases where the + * usermode helper is needed (I think mainly android) + */ + if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || + (adreno_gpu->fwloc == FW_LOCATION_HELPER)) { + + ret = request_firmware(&fw, newname, drm->dev); + if (!ret) { + dev_info(drm->dev, "loaded %s with helper\n", + newname); + adreno_gpu->fwloc = FW_LOCATION_HELPER; + return fw; + } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { + dev_err(drm->dev, "failed to load %s: %d\n", + newname, ret); + return ERR_PTR(ret); + } + } + + dev_err(drm->dev, "failed to load %s\n", fwname); + return ERR_PTR(-ENOENT); +} + +static int adreno_load_fw(struct adreno_gpu *adreno_gpu) +{ + const struct firmware *fw; + + if (adreno_gpu->pm4) + return 0; + + fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pm4fw); + if (IS_ERR(fw)) + return PTR_ERR(fw); + adreno_gpu->pm4 = fw; + + fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pfpfw); + if (IS_ERR(fw)) { + release_firmware(adreno_gpu->pm4); + adreno_gpu->pm4 = NULL; + return PTR_ERR(fw); + } + adreno_gpu->pfp = fw; + + return 0; +} + int adreno_hw_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - int ret; + int ret, i; DBG("%s", gpu->name); - ret = msm_gem_get_iova(gpu->rb->bo, gpu->aspace, &gpu->rb_iova); - if (ret) { - gpu->rb_iova = 0; - dev_err(gpu->dev->dev, "could not map ringbuffer: %d\n", ret); + ret = adreno_load_fw(adreno_gpu); + if (ret) return ret; - } - /* reset ringbuffer: */ - gpu->rb->cur = gpu->rb->start; + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + if (!ring) + continue; + + ret = msm_gem_get_iova(ring->bo, gpu->aspace, &ring->iova); + if (ret) { + ring->iova = 0; + dev_err(gpu->dev->dev, + "could not map ringbuffer %d: %d\n", i, ret); + return ret; + } + + ring->cur = ring->start; + ring->next = ring->start; - /* reset completed fence seqno: */ - adreno_gpu->memptrs->fence = gpu->fctx->completed_fence; - adreno_gpu->memptrs->rptr = 0; + /* reset completed fence seqno: */ + ring->memptrs->fence = ring->seqno; + ring->memptrs->rptr = 0; + } - /* Setup REG_CP_RB_CNTL: */ + /* + * Setup REG_CP_RB_CNTL. The same value is used across targets (with + * the excpetion of A430 that disables the RPTR shadow) - the cacluation + * for the ringbuffer size and block size is moved to msm_gpu.h for the + * pre-processor to deal with and the A430 variant is ORed in here + */ adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL, - /* size is log2(quad-words): */ - AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) | - AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) | - (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); + MSM_GPU_RB_CNTL_DEFAULT | + (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); - /* Setup ringbuffer address: */ + /* Setup ringbuffer address - use ringbuffer[0] for GPU init */ adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_BASE, - REG_ADRENO_CP_RB_BASE_HI, gpu->rb_iova); + REG_ADRENO_CP_RB_BASE_HI, gpu->rb[0]->iova); if (!adreno_is_a430(adreno_gpu)) { adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR, REG_ADRENO_CP_RB_RPTR_ADDR_HI, - rbmemptr(adreno_gpu, rptr)); + rbmemptr(gpu->rb[0], rptr)); } return 0; } -static uint32_t get_wptr(struct msm_ringbuffer *ring) -{ - return ring->cur - ring->start; -} - /* Use this helper to read rptr, since a430 doesn't update rptr in memory */ -static uint32_t get_rptr(struct adreno_gpu *adreno_gpu) +static uint32_t get_rptr(struct adreno_gpu *adreno_gpu, + struct msm_ringbuffer *ring) { if (adreno_is_a430(adreno_gpu)) - return adreno_gpu->memptrs->rptr = adreno_gpu_read( + return ring->memptrs->rptr = adreno_gpu_read( adreno_gpu, REG_ADRENO_CP_RB_RPTR); else - return adreno_gpu->memptrs->rptr; + return ring->memptrs->rptr; } -uint32_t adreno_last_fence(struct msm_gpu *gpu) +struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - return adreno_gpu->memptrs->fence; + return gpu->rb[0]; } void adreno_recover(struct msm_gpu *gpu) @@ -149,7 +256,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct msm_drm_private *priv = gpu->dev->dev_private; - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = submit->ring; unsigned i; for (i = 0; i < submit->nr_cmds; i++) { @@ -164,7 +271,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, case MSM_SUBMIT_CMD_BUF: OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ? CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2); - OUT_RING(ring, submit->cmd[i].iova); + OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, submit->cmd[i].size); OUT_PKT2(ring); break; @@ -172,7 +279,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, } OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); - OUT_RING(ring, submit->fence->seqno); + OUT_RING(ring, submit->seqno); if (adreno_is_a3xx(adreno_gpu) || adreno_is_a4xx(adreno_gpu)) { /* Flush HLSQ lazy updates to make sure there is nothing @@ -188,8 +295,8 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, OUT_PKT3(ring, CP_EVENT_WRITE, 3); OUT_RING(ring, CACHE_FLUSH_TS); - OUT_RING(ring, rbmemptr(adreno_gpu, fence)); - OUT_RING(ring, submit->fence->seqno); + OUT_RING(ring, rbmemptr(ring, fence)); + OUT_RING(ring, submit->seqno); /* we could maybe be clever and only CP_COND_EXEC the interrupt: */ OUT_PKT3(ring, CP_INTERRUPT, 1); @@ -215,20 +322,23 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, } #endif - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); } -void adreno_flush(struct msm_gpu *gpu) +void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); uint32_t wptr; + /* Copy the shadow to the actual register */ + ring->cur = ring->next; + /* * Mask wptr value that we calculate to fit in the HW range. This is * to account for the possibility that the last command fit exactly into * the ringbuffer and rb->next hasn't wrapped to zero yet */ - wptr = get_wptr(gpu->rb) & ((gpu->rb->size / 4) - 1); + wptr = get_wptr(ring); /* ensure writes to ringbuffer have hit system memory: */ mb(); @@ -236,17 +346,19 @@ void adreno_flush(struct msm_gpu *gpu) adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_WPTR, wptr); } -bool adreno_idle(struct msm_gpu *gpu) +bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - uint32_t wptr = get_wptr(gpu->rb); + uint32_t wptr = get_wptr(ring); /* wait for CP to drain ringbuffer: */ - if (!spin_until(get_rptr(adreno_gpu) == wptr)) + if (!spin_until(get_rptr(adreno_gpu, ring) == wptr)) return true; /* TODO maybe we need to reset GPU here to recover from hang? */ - DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name); + DRM_ERROR("%s: timeout waiting to drain ringbuffer %d rptr/wptr = %X/%X\n", + gpu->name, ring->id, get_rptr(adreno_gpu, ring), wptr); + return false; } @@ -261,10 +373,16 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m) adreno_gpu->rev.major, adreno_gpu->rev.minor, adreno_gpu->rev.patchid); - seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence, - gpu->fctx->last_fence); - seq_printf(m, "rptr: %d\n", get_rptr(adreno_gpu)); - seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb)); + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + seq_printf(m, "rb %d: fence: %d/%d\n", i, + ring->memptrs->fence, ring->seqno); + + seq_printf(m, " rptr: %d\n", + get_rptr(adreno_gpu, ring)); + seq_printf(m, "rb wptr: %d\n", get_wptr(ring)); + } /* dump these out in a form that can be parsed by demsm: */ seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name); @@ -290,16 +408,23 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m) void adreno_dump_info(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + int i; printk("revision: %d (%d.%d.%d.%d)\n", adreno_gpu->info->revn, adreno_gpu->rev.core, adreno_gpu->rev.major, adreno_gpu->rev.minor, adreno_gpu->rev.patchid); - printk("fence: %d/%d\n", adreno_gpu->memptrs->fence, - gpu->fctx->last_fence); - printk("rptr: %d\n", get_rptr(adreno_gpu)); - printk("rb wptr: %d\n", get_wptr(gpu->rb)); + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + printk("rb %d: fence: %d/%d\n", i, + ring->memptrs->fence, + ring->seqno); + + printk("rptr: %d\n", get_rptr(adreno_gpu, ring)); + printk("rb wptr: %d\n", get_wptr(ring)); + } } /* would be nice to not have to duplicate the _show() stuff with printk(): */ @@ -322,28 +447,31 @@ void adreno_dump(struct msm_gpu *gpu) } } -static uint32_t ring_freewords(struct msm_gpu *gpu) +static uint32_t ring_freewords(struct msm_ringbuffer *ring) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - uint32_t size = gpu->rb->size / 4; - uint32_t wptr = get_wptr(gpu->rb); - uint32_t rptr = get_rptr(adreno_gpu); + struct adreno_gpu *adreno_gpu = to_adreno_gpu(ring->gpu); + uint32_t size = MSM_GPU_RINGBUFFER_SZ >> 2; + /* Use ring->next to calculate free size */ + uint32_t wptr = ring->next - ring->start; + uint32_t rptr = get_rptr(adreno_gpu, ring); return (rptr + (size - 1) - wptr) % size; } -void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords) +void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords) { - if (spin_until(ring_freewords(gpu) >= ndwords)) - DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name); + if (spin_until(ring_freewords(ring) >= ndwords)) + DRM_DEV_ERROR(ring->gpu->dev->dev, + "timeout waiting for space in ringubffer %d\n", + ring->id); } int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, - struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs) + struct adreno_gpu *adreno_gpu, + const struct adreno_gpu_funcs *funcs, int nr_rings) { struct adreno_platform_config *config = pdev->dev.platform_data; struct msm_gpu_config adreno_gpu_config = { 0 }; struct msm_gpu *gpu = &adreno_gpu->base; - int ret; adreno_gpu->funcs = funcs; adreno_gpu->info = adreno_info(config->rev); @@ -366,59 +494,20 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, adreno_gpu_config.va_start = SZ_16M; adreno_gpu_config.va_end = 0xffffffff; - adreno_gpu_config.ringsz = RB_SIZE; + adreno_gpu_config.nr_rings = nr_rings; pm_runtime_set_autosuspend_delay(&pdev->dev, DRM_MSM_INACTIVE_PERIOD); pm_runtime_use_autosuspend(&pdev->dev); pm_runtime_enable(&pdev->dev); - ret = msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base, + return msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base, adreno_gpu->info->name, &adreno_gpu_config); - if (ret) - return ret; - - ret = request_firmware(&adreno_gpu->pm4, adreno_gpu->info->pm4fw, drm->dev); - if (ret) { - dev_err(drm->dev, "failed to load %s PM4 firmware: %d\n", - adreno_gpu->info->pm4fw, ret); - return ret; - } - - ret = request_firmware(&adreno_gpu->pfp, adreno_gpu->info->pfpfw, drm->dev); - if (ret) { - dev_err(drm->dev, "failed to load %s PFP firmware: %d\n", - adreno_gpu->info->pfpfw, ret); - return ret; - } - - adreno_gpu->memptrs = msm_gem_kernel_new(drm, - sizeof(*adreno_gpu->memptrs), MSM_BO_UNCACHED, gpu->aspace, - &adreno_gpu->memptrs_bo, &adreno_gpu->memptrs_iova); - - if (IS_ERR(adreno_gpu->memptrs)) { - ret = PTR_ERR(adreno_gpu->memptrs); - adreno_gpu->memptrs = NULL; - dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); - } - - return ret; } void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu) { - struct msm_gpu *gpu = &adreno_gpu->base; - - if (adreno_gpu->memptrs_bo) { - if (adreno_gpu->memptrs) - msm_gem_put_vaddr(adreno_gpu->memptrs_bo); - - if (adreno_gpu->memptrs_iova) - msm_gem_put_iova(adreno_gpu->memptrs_bo, gpu->aspace); - - drm_gem_object_unreference_unlocked(adreno_gpu->memptrs_bo); - } release_firmware(adreno_gpu->pm4); release_firmware(adreno_gpu->pfp); - msm_gpu_cleanup(gpu); + msm_gpu_cleanup(&adreno_gpu->base); } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 4d9165f29f43..28e3de6e5f94 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -2,7 +2,7 @@ * Copyright (C) 2013 Red Hat * Author: Rob Clark <robdclark@gmail.com> * - * Copyright (c) 2014 The Linux Foundation. All rights reserved. + * Copyright (c) 2014,2017 The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published by @@ -82,14 +82,6 @@ struct adreno_info { const struct adreno_info *adreno_info(struct adreno_rev rev); -#define rbmemptr(adreno_gpu, member) \ - ((adreno_gpu)->memptrs_iova + offsetof(struct adreno_rbmemptrs, member)) - -struct adreno_rbmemptrs { - volatile uint32_t rptr; - volatile uint32_t fence; -}; - struct adreno_gpu { struct msm_gpu base; struct adreno_rev rev; @@ -101,16 +93,30 @@ struct adreno_gpu { /* interesting register offsets to dump: */ const unsigned int *registers; + /* + * Are we loading fw from legacy path? Prior to addition + * of gpu firmware to linux-firmware, the fw files were + * placed in toplevel firmware directory, following qcom's + * android kernel. But linux-firmware preferred they be + * placed in a 'qcom' subdirectory. + * + * For backwards compatibility, we try first to load from + * the new path, using request_firmware_direct() to avoid + * any potential timeout waiting for usermode helper, then + * fall back to the old path (with direct load). And + * finally fall back to request_firmware() with the new + * path to allow the usermode helper. + */ + enum { + FW_LOCATION_UNKNOWN = 0, + FW_LOCATION_NEW, /* /lib/firmware/qcom/$fwfile */ + FW_LOCATION_LEGACY, /* /lib/firmware/$fwfile */ + FW_LOCATION_HELPER, + } fwloc; + /* firmware: */ const struct firmware *pm4, *pfp; - /* ringbuffer rptr/wptr: */ - // TODO should this be in msm_ringbuffer? I think it would be - // different for z180.. - struct adreno_rbmemptrs *memptrs; - struct drm_gem_object *memptrs_bo; - uint64_t memptrs_iova; - /* * Register offsets are different between some GPUs. * GPU specific offsets will be exported by GPU specific @@ -196,22 +202,25 @@ static inline int adreno_is_a530(struct adreno_gpu *gpu) } int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value); +const struct firmware *adreno_request_fw(struct adreno_gpu *adreno_gpu, + const char *fwname); int adreno_hw_init(struct msm_gpu *gpu); -uint32_t adreno_last_fence(struct msm_gpu *gpu); void adreno_recover(struct msm_gpu *gpu); void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx); -void adreno_flush(struct msm_gpu *gpu); -bool adreno_idle(struct msm_gpu *gpu); +void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring); +bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring); #ifdef CONFIG_DEBUG_FS void adreno_show(struct msm_gpu *gpu, struct seq_file *m); #endif void adreno_dump_info(struct msm_gpu *gpu); void adreno_dump(struct msm_gpu *gpu); -void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords); +void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords); +struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu); int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, - struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs); + struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs, + int nr_rings); void adreno_gpu_cleanup(struct adreno_gpu *gpu); @@ -220,7 +229,7 @@ void adreno_gpu_cleanup(struct adreno_gpu *gpu); static inline void OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt+1); + adreno_wait_ring(ring, cnt+1); OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF)); } @@ -228,14 +237,14 @@ OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) static inline void OUT_PKT2(struct msm_ringbuffer *ring) { - adreno_wait_ring(ring->gpu, 1); + adreno_wait_ring(ring, 1); OUT_RING(ring, CP_TYPE2_PKT); } static inline void OUT_PKT3(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt+1); + adreno_wait_ring(ring, cnt+1); OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8)); } @@ -257,14 +266,14 @@ static inline u32 PM4_PARITY(u32 val) static inline void OUT_PKT4(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt + 1); + adreno_wait_ring(ring, cnt + 1); OUT_RING(ring, PKT4(regindx, cnt)); } static inline void OUT_PKT7(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt + 1); + adreno_wait_ring(ring, cnt + 1); OUT_RING(ring, CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) | ((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23)); } @@ -323,6 +332,11 @@ static inline void adreno_gpu_write64(struct adreno_gpu *gpu, adreno_gpu_write(gpu, hi, upper_32_bits(data)); } +static inline uint32_t get_wptr(struct msm_ringbuffer *ring) +{ + return (ring->cur - ring->start) % (MSM_GPU_RINGBUFFER_SZ >> 2); +} + /* * Given a register and a count, return a value to program into * REG_CP_PROTECT_REG(n) - this will block both reads and writes for _len diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c index a5d75c9b3a73..65c1dfbbe019 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c @@ -14,7 +14,7 @@ #include "dsi_cfg.h" static const char * const dsi_v2_bus_clk_names[] = { - "core_mmss_clk", "iface_clk", "bus_clk", + "core_mmss", "iface", "bus", }; static const struct msm_dsi_config apq8064_dsi_cfg = { @@ -34,7 +34,7 @@ static const struct msm_dsi_config apq8064_dsi_cfg = { }; static const char * const dsi_6g_bus_clk_names[] = { - "mdp_core_clk", "iface_clk", "bus_clk", "core_mmss_clk", + "mdp_core", "iface", "bus", "core_mmss", }; static const struct msm_dsi_config msm8974_apq8084_dsi_cfg = { @@ -55,7 +55,7 @@ static const struct msm_dsi_config msm8974_apq8084_dsi_cfg = { }; static const char * const dsi_8916_bus_clk_names[] = { - "mdp_core_clk", "iface_clk", "bus_clk", + "mdp_core", "iface", "bus", }; static const struct msm_dsi_config msm8916_dsi_cfg = { @@ -99,7 +99,7 @@ static const struct msm_dsi_config msm8994_dsi_cfg = { * without it too. Figure out why it doesn't enable and uncomment below */ static const char * const dsi_8996_bus_clk_names[] = { - "mdp_core_clk", "iface_clk", "bus_clk", /* "core_mmss_clk", */ + "mdp_core", "iface", "bus", /* "core_mmss", */ }; static const struct msm_dsi_config msm8996_dsi_cfg = { diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index dbb31a014419..0f7324a686ca 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -248,7 +248,7 @@ disable_clks: clk_disable_unprepare(ahb_clk); disable_gdsc: regulator_disable(gdsc_reg); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); put_clk: clk_put(ahb_clk); put_gdsc: @@ -334,46 +334,46 @@ static int dsi_regulator_init(struct msm_dsi_host *msm_host) static int dsi_clk_init(struct msm_dsi_host *msm_host) { - struct device *dev = &msm_host->pdev->dev; + struct platform_device *pdev = msm_host->pdev; const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd; const struct msm_dsi_config *cfg = cfg_hnd->cfg; int i, ret = 0; /* get bus clocks */ for (i = 0; i < cfg->num_bus_clks; i++) { - msm_host->bus_clks[i] = devm_clk_get(dev, + msm_host->bus_clks[i] = msm_clk_get(pdev, cfg->bus_clk_names[i]); if (IS_ERR(msm_host->bus_clks[i])) { ret = PTR_ERR(msm_host->bus_clks[i]); - pr_err("%s: Unable to get %s, ret = %d\n", + pr_err("%s: Unable to get %s clock, ret = %d\n", __func__, cfg->bus_clk_names[i], ret); goto exit; } } /* get link and source clocks */ - msm_host->byte_clk = devm_clk_get(dev, "byte_clk"); + msm_host->byte_clk = msm_clk_get(pdev, "byte"); if (IS_ERR(msm_host->byte_clk)) { ret = PTR_ERR(msm_host->byte_clk); - pr_err("%s: can't find dsi_byte_clk. ret=%d\n", + pr_err("%s: can't find dsi_byte clock. ret=%d\n", __func__, ret); msm_host->byte_clk = NULL; goto exit; } - msm_host->pixel_clk = devm_clk_get(dev, "pixel_clk"); + msm_host->pixel_clk = msm_clk_get(pdev, "pixel"); if (IS_ERR(msm_host->pixel_clk)) { ret = PTR_ERR(msm_host->pixel_clk); - pr_err("%s: can't find dsi_pixel_clk. ret=%d\n", + pr_err("%s: can't find dsi_pixel clock. ret=%d\n", __func__, ret); msm_host->pixel_clk = NULL; goto exit; } - msm_host->esc_clk = devm_clk_get(dev, "core_clk"); + msm_host->esc_clk = msm_clk_get(pdev, "core"); if (IS_ERR(msm_host->esc_clk)) { ret = PTR_ERR(msm_host->esc_clk); - pr_err("%s: can't find dsi_esc_clk. ret=%d\n", + pr_err("%s: can't find dsi_esc clock. ret=%d\n", __func__, ret); msm_host->esc_clk = NULL; goto exit; @@ -382,22 +382,22 @@ static int dsi_clk_init(struct msm_dsi_host *msm_host) msm_host->byte_clk_src = clk_get_parent(msm_host->byte_clk); if (!msm_host->byte_clk_src) { ret = -ENODEV; - pr_err("%s: can't find byte_clk_src. ret=%d\n", __func__, ret); + pr_err("%s: can't find byte_clk clock. ret=%d\n", __func__, ret); goto exit; } msm_host->pixel_clk_src = clk_get_parent(msm_host->pixel_clk); if (!msm_host->pixel_clk_src) { ret = -ENODEV; - pr_err("%s: can't find pixel_clk_src. ret=%d\n", __func__, ret); + pr_err("%s: can't find pixel_clk clock. ret=%d\n", __func__, ret); goto exit; } if (cfg_hnd->major == MSM_DSI_VER_MAJOR_V2) { - msm_host->src_clk = devm_clk_get(dev, "src_clk"); + msm_host->src_clk = msm_clk_get(pdev, "src"); if (IS_ERR(msm_host->src_clk)) { ret = PTR_ERR(msm_host->src_clk); - pr_err("%s: can't find dsi_src_clk. ret=%d\n", + pr_err("%s: can't find src clock. ret=%d\n", __func__, ret); msm_host->src_clk = NULL; goto exit; @@ -406,7 +406,7 @@ static int dsi_clk_init(struct msm_dsi_host *msm_host) msm_host->esc_clk_src = clk_get_parent(msm_host->esc_clk); if (!msm_host->esc_clk_src) { ret = -ENODEV; - pr_err("%s: can't get esc_clk_src. ret=%d\n", + pr_err("%s: can't get esc clock parent. ret=%d\n", __func__, ret); goto exit; } @@ -414,7 +414,7 @@ static int dsi_clk_init(struct msm_dsi_host *msm_host) msm_host->dsi_clk_src = clk_get_parent(msm_host->src_clk); if (!msm_host->dsi_clk_src) { ret = -ENODEV; - pr_err("%s: can't get dsi_clk_src. ret=%d\n", + pr_err("%s: can't get src clock parent. ret=%d\n", __func__, ret); } } diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 7c9bf91bc22b..790ca280cbfd 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -482,7 +482,7 @@ static int dsi_phy_driver_probe(struct platform_device *pdev) goto fail; } - phy->ahb_clk = devm_clk_get(dev, "iface_clk"); + phy->ahb_clk = msm_clk_get(pdev, "iface"); if (IS_ERR(phy->ahb_clk)) { dev_err(dev, "%s: Unable to get ahb clk\n", __func__); ret = PTR_ERR(phy->ahb_clk); diff --git a/drivers/gpu/drm/msm/edp/edp_ctrl.c b/drivers/gpu/drm/msm/edp/edp_ctrl.c index e32a4a4f3797..7c72264101ff 100644 --- a/drivers/gpu/drm/msm/edp/edp_ctrl.c +++ b/drivers/gpu/drm/msm/edp/edp_ctrl.c @@ -150,46 +150,46 @@ static const struct edp_pixel_clk_div clk_divs[2][EDP_PIXEL_CLK_NUM] = { static int edp_clk_init(struct edp_ctrl *ctrl) { - struct device *dev = &ctrl->pdev->dev; + struct platform_device *pdev = ctrl->pdev; int ret; - ctrl->aux_clk = devm_clk_get(dev, "core_clk"); + ctrl->aux_clk = msm_clk_get(pdev, "core"); if (IS_ERR(ctrl->aux_clk)) { ret = PTR_ERR(ctrl->aux_clk); - pr_err("%s: Can't find aux_clk, %d\n", __func__, ret); + pr_err("%s: Can't find core clock, %d\n", __func__, ret); ctrl->aux_clk = NULL; return ret; } - ctrl->pixel_clk = devm_clk_get(dev, "pixel_clk"); + ctrl->pixel_clk = msm_clk_get(pdev, "pixel"); if (IS_ERR(ctrl->pixel_clk)) { ret = PTR_ERR(ctrl->pixel_clk); - pr_err("%s: Can't find pixel_clk, %d\n", __func__, ret); + pr_err("%s: Can't find pixel clock, %d\n", __func__, ret); ctrl->pixel_clk = NULL; return ret; } - ctrl->ahb_clk = devm_clk_get(dev, "iface_clk"); + ctrl->ahb_clk = msm_clk_get(pdev, "iface"); if (IS_ERR(ctrl->ahb_clk)) { ret = PTR_ERR(ctrl->ahb_clk); - pr_err("%s: Can't find ahb_clk, %d\n", __func__, ret); + pr_err("%s: Can't find iface clock, %d\n", __func__, ret); ctrl->ahb_clk = NULL; return ret; } - ctrl->link_clk = devm_clk_get(dev, "link_clk"); + ctrl->link_clk = msm_clk_get(pdev, "link"); if (IS_ERR(ctrl->link_clk)) { ret = PTR_ERR(ctrl->link_clk); - pr_err("%s: Can't find link_clk, %d\n", __func__, ret); + pr_err("%s: Can't find link clock, %d\n", __func__, ret); ctrl->link_clk = NULL; return ret; } /* need mdp core clock to receive irq */ - ctrl->mdp_core_clk = devm_clk_get(dev, "mdp_core_clk"); + ctrl->mdp_core_clk = msm_clk_get(pdev, "mdp_core"); if (IS_ERR(ctrl->mdp_core_clk)) { ret = PTR_ERR(ctrl->mdp_core_clk); - pr_err("%s: Can't find mdp_core_clk, %d\n", __func__, ret); + pr_err("%s: Can't find mdp_core clock, %d\n", __func__, ret); ctrl->mdp_core_clk = NULL; return ret; } diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c index 17e069a133a4..e63dc0fb55f8 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi.c @@ -208,7 +208,7 @@ static struct hdmi *msm_hdmi_init(struct platform_device *pdev) for (i = 0; i < config->hpd_clk_cnt; i++) { struct clk *clk; - clk = devm_clk_get(&pdev->dev, config->hpd_clk_names[i]); + clk = msm_clk_get(pdev, config->hpd_clk_names[i]); if (IS_ERR(clk)) { ret = PTR_ERR(clk); dev_err(&pdev->dev, "failed to get hpd clk: %s (%d)\n", @@ -228,7 +228,7 @@ static struct hdmi *msm_hdmi_init(struct platform_device *pdev) for (i = 0; i < config->pwr_clk_cnt; i++) { struct clk *clk; - clk = devm_clk_get(&pdev->dev, config->pwr_clk_names[i]); + clk = msm_clk_get(pdev, config->pwr_clk_names[i]); if (IS_ERR(clk)) { ret = PTR_ERR(clk); dev_err(&pdev->dev, "failed to get pwr clk: %s (%d)\n", @@ -361,7 +361,7 @@ static const char *hpd_reg_names_none[] = {}; static struct hdmi_platform_config hdmi_tx_8660_config; static const char *hpd_reg_names_8960[] = {"core-vdda", "hdmi-mux"}; -static const char *hpd_clk_names_8960[] = {"core_clk", "master_iface_clk", "slave_iface_clk"}; +static const char *hpd_clk_names_8960[] = {"core", "master_iface", "slave_iface"}; static struct hdmi_platform_config hdmi_tx_8960_config = { HDMI_CFG(hpd_reg, 8960), @@ -370,8 +370,8 @@ static struct hdmi_platform_config hdmi_tx_8960_config = { static const char *pwr_reg_names_8x74[] = {"core-vdda", "core-vcc"}; static const char *hpd_reg_names_8x74[] = {"hpd-gdsc", "hpd-5v"}; -static const char *pwr_clk_names_8x74[] = {"extp_clk", "alt_iface_clk"}; -static const char *hpd_clk_names_8x74[] = {"iface_clk", "core_clk", "mdp_core_clk"}; +static const char *pwr_clk_names_8x74[] = {"extp", "alt_iface"}; +static const char *hpd_clk_names_8x74[] = {"iface", "core", "mdp_core"}; static unsigned long hpd_clk_freq_8x74[] = {0, 19200000, 0}; static struct hdmi_platform_config hdmi_tx_8974_config = { diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy.c index 534ce5b49781..5e631392dc85 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy.c @@ -48,7 +48,7 @@ static int msm_hdmi_phy_resource_init(struct hdmi_phy *phy) for (i = 0; i < cfg->num_clks; i++) { struct clk *clk; - clk = devm_clk_get(dev, cfg->clk_names[i]); + clk = msm_clk_get(phy->pdev, cfg->clk_names[i]); if (IS_ERR(clk)) { ret = PTR_ERR(clk); dev_err(dev, "failed to get phy clock: %s (%d)\n", diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c index e6ee6b745ab7..0980da8ec966 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c @@ -48,7 +48,7 @@ static const char * const hdmi_phy_8960_reg_names[] = { }; static const char * const hdmi_phy_8960_clk_names[] = { - "slave_iface_clk", + "slave_iface", }; const struct hdmi_phy_cfg msm_hdmi_phy_8960_cfg = { diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c index 1fb7645cc721..0df504c61833 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c @@ -758,9 +758,7 @@ static const char * const hdmi_phy_8996_reg_names[] = { }; static const char * const hdmi_phy_8996_clk_names[] = { - "mmagic_iface_clk", - "iface_clk", - "ref_clk", + "iface", "ref", }; const struct hdmi_phy_cfg msm_hdmi_phy_8996_cfg = { diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x74.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x74.c index c4a61e537851..4a8b8468586a 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x74.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x74.c @@ -41,8 +41,7 @@ static const char * const hdmi_phy_8x74_reg_names[] = { }; static const char * const hdmi_phy_8x74_clk_names[] = { - "iface_clk", - "alt_iface_clk" + "iface", "alt_iface" }; const struct hdmi_phy_cfg msm_hdmi_phy_8x74_cfg = { diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c index 47fa2aba1983..14bd3bd3e040 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c @@ -290,6 +290,9 @@ static void mdp4_crtc_atomic_disable(struct drm_crtc *crtc, if (WARN_ON(!mdp4_crtc->enabled)) return; + /* Disable/save vblank irq handling before power is disabled */ + drm_crtc_vblank_off(crtc); + mdp_irq_unregister(&mdp4_kms->base, &mdp4_crtc->err); mdp4_disable(mdp4_kms); @@ -308,6 +311,10 @@ static void mdp4_crtc_atomic_enable(struct drm_crtc *crtc, return; mdp4_enable(mdp4_kms); + + /* Restore vblank irq handling after power is enabled */ + drm_crtc_vblank_on(crtc); + mdp_irq_register(&mdp4_kms->base, &mdp4_crtc->err); crtc_flush(crtc); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c index c2bdad88447e..824067d2d427 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c @@ -83,6 +83,8 @@ const struct mdp5_cfg_hw msm8x74v1_config = { .caps = MDP_LM_CAP_WB }, }, .nb_stages = 5, + .max_width = 2048, + .max_height = 0xFFFF, }, .dspp = { .count = 3, diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c index 60790df91bfa..1abc7f5c345c 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c @@ -224,7 +224,7 @@ int mdp5_cmd_encoder_set_split_display(struct drm_encoder *encoder, mdp5_write(mdp5_kms, REG_MDP5_SPLIT_DPL_LOWER, MDP5_SPLIT_DPL_LOWER_SMART_PANEL); mdp5_write(mdp5_kms, REG_MDP5_SPLIT_DPL_EN, 1); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); return 0; } diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c index 6fcb58ab718c..e414850dbbda 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c @@ -55,18 +55,23 @@ struct mdp5_crtc { struct completion pp_completion; + bool lm_cursor_enabled; + struct { /* protect REG_MDP5_LM_CURSOR* registers and cursor scanout_bo*/ spinlock_t lock; /* current cursor being scanned out: */ struct drm_gem_object *scanout_bo; + uint64_t iova; uint32_t width, height; uint32_t x, y; } cursor; }; #define to_mdp5_crtc(x) container_of(x, struct mdp5_crtc, base) +static void mdp5_crtc_restore_cursor(struct drm_crtc *crtc); + static struct mdp5_kms *get_kms(struct drm_crtc *crtc) { struct msm_drm_private *priv = crtc->dev->dev_private; @@ -114,6 +119,8 @@ static u32 crtc_flush_all(struct drm_crtc *crtc) return 0; drm_atomic_crtc_for_each_plane(plane, crtc) { + if (!plane->state->visible) + continue; flush_mask |= mdp5_plane_get_flush(plane); } @@ -242,6 +249,9 @@ static void blend_setup(struct drm_crtc *crtc) drm_atomic_crtc_for_each_plane(plane, crtc) { enum mdp5_pipe right_pipe; + if (!plane->state->visible) + continue; + pstate = to_mdp5_plane_state(plane->state); pstates[pstate->stage] = pstate; stage[pstate->stage][PIPE_LEFT] = mdp5_plane_pipe(plane); @@ -422,11 +432,14 @@ static void mdp5_crtc_atomic_disable(struct drm_crtc *crtc, if (WARN_ON(!mdp5_crtc->enabled)) return; + /* Disable/save vblank irq handling before power is disabled */ + drm_crtc_vblank_off(crtc); + if (mdp5_cstate->cmd_mode) mdp_irq_unregister(&mdp5_kms->base, &mdp5_crtc->pp_done); mdp_irq_unregister(&mdp5_kms->base, &mdp5_crtc->err); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); mdp5_crtc->enabled = false; } @@ -446,6 +459,29 @@ static void mdp5_crtc_atomic_enable(struct drm_crtc *crtc, pm_runtime_get_sync(dev); + if (mdp5_crtc->lm_cursor_enabled) { + /* + * Restore LM cursor state, as it might have been lost + * with suspend: + */ + if (mdp5_crtc->cursor.iova) { + unsigned long flags; + + spin_lock_irqsave(&mdp5_crtc->cursor.lock, flags); + mdp5_crtc_restore_cursor(crtc); + spin_unlock_irqrestore(&mdp5_crtc->cursor.lock, flags); + + mdp5_ctl_set_cursor(mdp5_cstate->ctl, + &mdp5_cstate->pipeline, 0, true); + } else { + mdp5_ctl_set_cursor(mdp5_cstate->ctl, + &mdp5_cstate->pipeline, 0, false); + } + } + + /* Restore vblank irq handling after power is enabled */ + drm_crtc_vblank_on(crtc); + mdp5_crtc_mode_set_nofb(crtc); mdp_irq_register(&mdp5_kms->base, &mdp5_crtc->err); @@ -580,6 +616,9 @@ static int mdp5_crtc_atomic_check(struct drm_crtc *crtc, DBG("%s: check", crtc->name); drm_atomic_crtc_state_for_each_plane_state(plane, pstate, state) { + if (!pstate->visible) + continue; + pstates[cnt].plane = plane; pstates[cnt].state = to_mdp5_plane_state(pstate); @@ -723,6 +762,50 @@ static void get_roi(struct drm_crtc *crtc, uint32_t *roi_w, uint32_t *roi_h) mdp5_crtc->cursor.y); } +static void mdp5_crtc_restore_cursor(struct drm_crtc *crtc) +{ + struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state); + struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc); + struct mdp5_kms *mdp5_kms = get_kms(crtc); + const enum mdp5_cursor_alpha cur_alpha = CURSOR_ALPHA_PER_PIXEL; + uint32_t blendcfg, stride; + uint32_t x, y, width, height; + uint32_t roi_w, roi_h; + int lm; + + assert_spin_locked(&mdp5_crtc->cursor.lock); + + lm = mdp5_cstate->pipeline.mixer->lm; + + x = mdp5_crtc->cursor.x; + y = mdp5_crtc->cursor.y; + width = mdp5_crtc->cursor.width; + height = mdp5_crtc->cursor.height; + + stride = width * drm_format_plane_cpp(DRM_FORMAT_ARGB8888, 0); + + get_roi(crtc, &roi_w, &roi_h); + + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_STRIDE(lm), stride); + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_FORMAT(lm), + MDP5_LM_CURSOR_FORMAT_FORMAT(CURSOR_FMT_ARGB8888)); + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_IMG_SIZE(lm), + MDP5_LM_CURSOR_IMG_SIZE_SRC_H(height) | + MDP5_LM_CURSOR_IMG_SIZE_SRC_W(width)); + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_SIZE(lm), + MDP5_LM_CURSOR_SIZE_ROI_H(roi_h) | + MDP5_LM_CURSOR_SIZE_ROI_W(roi_w)); + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_START_XY(lm), + MDP5_LM_CURSOR_START_XY_Y_START(y) | + MDP5_LM_CURSOR_START_XY_X_START(x)); + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_BASE_ADDR(lm), + mdp5_crtc->cursor.iova); + + blendcfg = MDP5_LM_CURSOR_BLEND_CONFIG_BLEND_EN; + blendcfg |= MDP5_LM_CURSOR_BLEND_CONFIG_BLEND_ALPHA_SEL(cur_alpha); + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_BLEND_CONFIG(lm), blendcfg); +} + static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file, uint32_t handle, uint32_t width, uint32_t height) @@ -735,16 +818,18 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, struct platform_device *pdev = mdp5_kms->pdev; struct msm_kms *kms = &mdp5_kms->base.base; struct drm_gem_object *cursor_bo, *old_bo = NULL; - uint32_t blendcfg, stride; - uint64_t cursor_addr; struct mdp5_ctl *ctl; - int ret, lm; - enum mdp5_cursor_alpha cur_alpha = CURSOR_ALPHA_PER_PIXEL; + int ret; uint32_t flush_mask = mdp_ctl_flush_mask_cursor(0); - uint32_t roi_w, roi_h; bool cursor_enable = true; unsigned long flags; + if (!mdp5_crtc->lm_cursor_enabled) { + dev_warn(dev->dev, + "cursor_set is deprecated with cursor planes\n"); + return -EINVAL; + } + if ((width > CURSOR_WIDTH) || (height > CURSOR_HEIGHT)) { dev_err(dev->dev, "bad cursor size: %dx%d\n", width, height); return -EINVAL; @@ -761,6 +846,7 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, if (!handle) { DBG("Cursor off"); cursor_enable = false; + mdp5_crtc->cursor.iova = 0; pm_runtime_get_sync(&pdev->dev); goto set_cursor; } @@ -769,13 +855,11 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, if (!cursor_bo) return -ENOENT; - ret = msm_gem_get_iova(cursor_bo, kms->aspace, &cursor_addr); + ret = msm_gem_get_iova(cursor_bo, kms->aspace, + &mdp5_crtc->cursor.iova); if (ret) return -EINVAL; - lm = mdp5_cstate->pipeline.mixer->lm; - stride = width * drm_format_plane_cpp(DRM_FORMAT_ARGB8888, 0); - pm_runtime_get_sync(&pdev->dev); spin_lock_irqsave(&mdp5_crtc->cursor.lock, flags); @@ -785,27 +869,10 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, mdp5_crtc->cursor.width = width; mdp5_crtc->cursor.height = height; - get_roi(crtc, &roi_w, &roi_h); - - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_STRIDE(lm), stride); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_FORMAT(lm), - MDP5_LM_CURSOR_FORMAT_FORMAT(CURSOR_FMT_ARGB8888)); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_IMG_SIZE(lm), - MDP5_LM_CURSOR_IMG_SIZE_SRC_H(height) | - MDP5_LM_CURSOR_IMG_SIZE_SRC_W(width)); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_SIZE(lm), - MDP5_LM_CURSOR_SIZE_ROI_H(roi_h) | - MDP5_LM_CURSOR_SIZE_ROI_W(roi_w)); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_BASE_ADDR(lm), cursor_addr); - - blendcfg = MDP5_LM_CURSOR_BLEND_CONFIG_BLEND_EN; - blendcfg |= MDP5_LM_CURSOR_BLEND_CONFIG_BLEND_ALPHA_SEL(cur_alpha); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_BLEND_CONFIG(lm), blendcfg); + mdp5_crtc_restore_cursor(crtc); spin_unlock_irqrestore(&mdp5_crtc->cursor.lock, flags); - pm_runtime_put_autosuspend(&pdev->dev); - set_cursor: ret = mdp5_ctl_set_cursor(ctl, pipeline, 0, cursor_enable); if (ret) { @@ -817,7 +884,7 @@ set_cursor: crtc_flush(crtc, flush_mask); end: - pm_runtime_put_autosuspend(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); if (old_bo) { drm_flip_work_queue(&mdp5_crtc->unref_cursor_work, old_bo); /* enable vblank to complete cursor work: */ @@ -831,12 +898,18 @@ static int mdp5_crtc_cursor_move(struct drm_crtc *crtc, int x, int y) struct mdp5_kms *mdp5_kms = get_kms(crtc); struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc); struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state); - uint32_t lm = mdp5_cstate->pipeline.mixer->lm; uint32_t flush_mask = mdp_ctl_flush_mask_cursor(0); + struct drm_device *dev = crtc->dev; uint32_t roi_w; uint32_t roi_h; unsigned long flags; + if (!mdp5_crtc->lm_cursor_enabled) { + dev_warn(dev->dev, + "cursor_move is deprecated with cursor planes\n"); + return -EINVAL; + } + /* don't support LM cursors when we we have source split enabled */ if (mdp5_cstate->pipeline.r_mixer) return -EINVAL; @@ -853,17 +926,12 @@ static int mdp5_crtc_cursor_move(struct drm_crtc *crtc, int x, int y) pm_runtime_get_sync(&mdp5_kms->pdev->dev); spin_lock_irqsave(&mdp5_crtc->cursor.lock, flags); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_SIZE(lm), - MDP5_LM_CURSOR_SIZE_ROI_H(roi_h) | - MDP5_LM_CURSOR_SIZE_ROI_W(roi_w)); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_START_XY(lm), - MDP5_LM_CURSOR_START_XY_Y_START(y) | - MDP5_LM_CURSOR_START_XY_X_START(x)); + mdp5_crtc_restore_cursor(crtc); spin_unlock_irqrestore(&mdp5_crtc->cursor.lock, flags); crtc_flush(crtc, flush_mask); - pm_runtime_put_autosuspend(&mdp5_kms->pdev->dev); + pm_runtime_put_sync(&mdp5_kms->pdev->dev); return 0; } @@ -943,16 +1011,6 @@ static const struct drm_crtc_funcs mdp5_crtc_funcs = { .atomic_print_state = mdp5_crtc_atomic_print_state, }; -static const struct drm_crtc_funcs mdp5_crtc_no_lm_cursor_funcs = { - .set_config = drm_atomic_helper_set_config, - .destroy = mdp5_crtc_destroy, - .page_flip = drm_atomic_helper_page_flip, - .reset = mdp5_crtc_reset, - .atomic_duplicate_state = mdp5_crtc_duplicate_state, - .atomic_destroy_state = mdp5_crtc_destroy_state, - .atomic_print_state = mdp5_crtc_atomic_print_state, -}; - static const struct drm_crtc_helper_funcs mdp5_crtc_helper_funcs = { .mode_set_nofb = mdp5_crtc_mode_set_nofb, .atomic_check = mdp5_crtc_atomic_check, @@ -1121,12 +1179,10 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev, mdp5_crtc->err.irq = mdp5_crtc_err_irq; mdp5_crtc->pp_done.irq = mdp5_crtc_pp_done_irq; - if (cursor_plane) - drm_crtc_init_with_planes(dev, crtc, plane, cursor_plane, - &mdp5_crtc_no_lm_cursor_funcs, NULL); - else - drm_crtc_init_with_planes(dev, crtc, plane, NULL, - &mdp5_crtc_funcs, NULL); + mdp5_crtc->lm_cursor_enabled = cursor_plane ? false : true; + + drm_crtc_init_with_planes(dev, crtc, plane, cursor_plane, + &mdp5_crtc_funcs, NULL); drm_flip_work_init(&mdp5_crtc->unref_cursor_work, "unref cursor", unref_cursor_worker); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c index 5b851380d3f2..36ad3cbe5f79 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c @@ -384,7 +384,7 @@ int mdp5_vid_encoder_set_split_display(struct drm_encoder *encoder, mdp5_ctl_pair(mdp5_encoder->ctl, mdp5_slave_enc->ctl, true); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); return 0; } diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c index bb5deb00c899..280e368bc9bb 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c @@ -54,7 +54,7 @@ void mdp5_irq_preinstall(struct msm_kms *kms) pm_runtime_get_sync(dev); mdp5_write(mdp5_kms, REG_MDP5_INTR_CLEAR, 0xffffffff); mdp5_write(mdp5_kms, REG_MDP5_INTR_EN, 0x00000000); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); } int mdp5_irq_postinstall(struct msm_kms *kms) @@ -72,7 +72,7 @@ int mdp5_irq_postinstall(struct msm_kms *kms) pm_runtime_get_sync(dev); mdp_irq_register(mdp_kms, error_handler); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); return 0; } @@ -84,7 +84,7 @@ void mdp5_irq_uninstall(struct msm_kms *kms) pm_runtime_get_sync(dev); mdp5_write(mdp5_kms, REG_MDP5_INTR_EN, 0x00000000); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); } irqreturn_t mdp5_irq(struct msm_kms *kms) @@ -119,7 +119,7 @@ int mdp5_enable_vblank(struct msm_kms *kms, struct drm_crtc *crtc) pm_runtime_get_sync(dev); mdp_update_vblank_mask(to_mdp_kms(kms), mdp5_crtc_vblank(crtc), true); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); return 0; } @@ -132,5 +132,5 @@ void mdp5_disable_vblank(struct msm_kms *kms, struct drm_crtc *crtc) pm_runtime_get_sync(dev); mdp_update_vblank_mask(to_mdp_kms(kms), mdp5_crtc_vblank(crtc), false); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); } diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c index 7e829a8d1cb1..3e9bba4d6624 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c @@ -125,7 +125,7 @@ static void mdp5_complete_commit(struct msm_kms *kms, struct drm_atomic_state *s if (mdp5_kms->smp) mdp5_smp_complete_commit(mdp5_kms->smp, &mdp5_kms->state->smp); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); } static void mdp5_wait_for_crtc_commit_done(struct msm_kms *kms, @@ -496,12 +496,12 @@ static void read_mdp_hw_revision(struct mdp5_kms *mdp5_kms, pm_runtime_get_sync(dev); version = mdp5_read(mdp5_kms, REG_MDP5_HW_VERSION); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); *major = FIELD(version, MDP5_HW_VERSION_MAJOR); *minor = FIELD(version, MDP5_HW_VERSION_MINOR); - DBG("MDP5 version v%d.%d", *major, *minor); + dev_info(dev, "MDP5 version v%d.%d", *major, *minor); } static int get_clk(struct platform_device *pdev, struct clk **clkp, @@ -683,7 +683,7 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev) aspace = NULL;; } - pm_runtime_put_autosuspend(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); ret = modeset_init(mdp5_kms); if (ret) { diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.c index 2bfac3712685..ff52c49095f9 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.c @@ -17,19 +17,20 @@ #include "mdp5_kms.h" -struct mdp5_hw_pipe *mdp5_pipe_assign(struct drm_atomic_state *s, - struct drm_plane *plane, uint32_t caps, uint32_t blkcfg) +int mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane, + uint32_t caps, uint32_t blkcfg, + struct mdp5_hw_pipe **hwpipe, + struct mdp5_hw_pipe **r_hwpipe) { struct msm_drm_private *priv = s->dev->dev_private; struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(priv->kms)); struct mdp5_state *state; struct mdp5_hw_pipe_state *old_state, *new_state; - struct mdp5_hw_pipe *hwpipe = NULL; - int i; + int i, j; state = mdp5_get_state(s); if (IS_ERR(state)) - return ERR_CAST(state); + return PTR_ERR(state); /* grab old_state after mdp5_get_state(), since now we hold lock: */ old_state = &mdp5_kms->state->hwpipe; @@ -64,31 +65,67 @@ struct mdp5_hw_pipe *mdp5_pipe_assign(struct drm_atomic_state *s, /* possible candidate, take the one with the * fewest unneeded caps bits set: */ - if (!hwpipe || (hweight_long(cur->caps & ~caps) < - hweight_long(hwpipe->caps & ~caps))) - hwpipe = cur; + if (!(*hwpipe) || (hweight_long(cur->caps & ~caps) < + hweight_long((*hwpipe)->caps & ~caps))) { + bool r_found = false; + + if (r_hwpipe) { + for (j = i + 1; j < mdp5_kms->num_hwpipes; + j++) { + struct mdp5_hw_pipe *r_cur = + mdp5_kms->hwpipes[j]; + + /* reject different types of hwpipes */ + if (r_cur->caps != cur->caps) + continue; + + /* respect priority, eg. VIG0 > VIG1 */ + if (cur->pipe > r_cur->pipe) + continue; + + *r_hwpipe = r_cur; + r_found = true; + break; + } + } + + if (!r_hwpipe || r_found) + *hwpipe = cur; + } } - if (!hwpipe) - return ERR_PTR(-ENOMEM); + if (!(*hwpipe)) + return -ENOMEM; + + if (r_hwpipe && !(*r_hwpipe)) + return -ENOMEM; if (mdp5_kms->smp) { int ret; - DBG("%s: alloc SMP blocks", hwpipe->name); + /* We don't support SMP and 2 hwpipes/plane together */ + WARN_ON(r_hwpipe); + + DBG("%s: alloc SMP blocks", (*hwpipe)->name); ret = mdp5_smp_assign(mdp5_kms->smp, &state->smp, - hwpipe->pipe, blkcfg); + (*hwpipe)->pipe, blkcfg); if (ret) - return ERR_PTR(-ENOMEM); + return -ENOMEM; - hwpipe->blkcfg = blkcfg; + (*hwpipe)->blkcfg = blkcfg; } DBG("%s: assign to plane %s for caps %x", - hwpipe->name, plane->name, caps); - new_state->hwpipe_to_plane[hwpipe->idx] = plane; + (*hwpipe)->name, plane->name, caps); + new_state->hwpipe_to_plane[(*hwpipe)->idx] = plane; - return hwpipe; + if (r_hwpipe) { + DBG("%s: assign to right of plane %s for caps %x", + (*r_hwpipe)->name, plane->name, caps); + new_state->hwpipe_to_plane[(*r_hwpipe)->idx] = plane; + } + + return 0; } void mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h index 924c3e6f9517..bb2b0ac7aa2b 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h @@ -44,9 +44,10 @@ struct mdp5_hw_pipe_state { struct drm_plane *hwpipe_to_plane[SSPP_MAX]; }; -struct mdp5_hw_pipe *__must_check -mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane, - uint32_t caps, uint32_t blkcfg); +int mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane, + uint32_t caps, uint32_t blkcfg, + struct mdp5_hw_pipe **hwpipe, + struct mdp5_hw_pipe **r_hwpipe); void mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe); struct mdp5_hw_pipe *mdp5_pipe_init(enum mdp5_pipe pipe, diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c index 4b22ac3413a1..be50445f9901 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c @@ -31,15 +31,6 @@ static int mdp5_plane_mode_set(struct drm_plane *plane, struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_rect *src, struct drm_rect *dest); -static int mdp5_update_cursor_plane_legacy(struct drm_plane *plane, - struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int crtc_x, int crtc_y, - unsigned int crtc_w, unsigned int crtc_h, - uint32_t src_x, uint32_t src_y, - uint32_t src_w, uint32_t src_h, - struct drm_modeset_acquire_ctx *ctx); - static struct mdp5_kms *get_kms(struct drm_plane *plane) { struct msm_drm_private *priv = plane->dev->dev_private; @@ -254,18 +245,6 @@ static const struct drm_plane_funcs mdp5_plane_funcs = { .atomic_print_state = mdp5_plane_atomic_print_state, }; -static const struct drm_plane_funcs mdp5_cursor_plane_funcs = { - .update_plane = mdp5_update_cursor_plane_legacy, - .disable_plane = drm_atomic_helper_disable_plane, - .destroy = mdp5_plane_destroy, - .atomic_set_property = mdp5_plane_atomic_set_property, - .atomic_get_property = mdp5_plane_atomic_get_property, - .reset = mdp5_plane_reset, - .atomic_duplicate_state = mdp5_plane_duplicate_state, - .atomic_destroy_state = mdp5_plane_destroy_state, - .atomic_print_state = mdp5_plane_atomic_print_state, -}; - static int mdp5_plane_prepare_fb(struct drm_plane *plane, struct drm_plane_state *new_state) { @@ -414,31 +393,30 @@ static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state, struct mdp5_hw_pipe *old_hwpipe = mdp5_state->hwpipe; struct mdp5_hw_pipe *old_right_hwpipe = mdp5_state->r_hwpipe; - - mdp5_state->hwpipe = mdp5_pipe_assign(state->state, - plane, caps, blkcfg); - if (IS_ERR(mdp5_state->hwpipe)) { - DBG("%s: failed to assign hwpipe!", plane->name); - return PTR_ERR(mdp5_state->hwpipe); + struct mdp5_hw_pipe *new_hwpipe = NULL; + struct mdp5_hw_pipe *new_right_hwpipe = NULL; + + ret = mdp5_pipe_assign(state->state, plane, caps, + blkcfg, &new_hwpipe, + need_right_hwpipe ? + &new_right_hwpipe : NULL); + if (ret) { + DBG("%s: failed to assign hwpipe(s)!", + plane->name); + return ret; } - if (need_right_hwpipe) { - mdp5_state->r_hwpipe = - mdp5_pipe_assign(state->state, plane, - caps, blkcfg); - if (IS_ERR(mdp5_state->r_hwpipe)) { - DBG("%s: failed to assign right hwpipe", - plane->name); - return PTR_ERR(mdp5_state->r_hwpipe); - } - } else { + mdp5_state->hwpipe = new_hwpipe; + if (need_right_hwpipe) + mdp5_state->r_hwpipe = new_right_hwpipe; + else /* * set it to NULL so that the driver knows we * don't have a right hwpipe when committing a * new state */ mdp5_state->r_hwpipe = NULL; - } + mdp5_pipe_release(state->state, old_hwpipe); mdp5_pipe_release(state->state, old_right_hwpipe); @@ -487,11 +465,98 @@ static void mdp5_plane_atomic_update(struct drm_plane *plane, } } +static int mdp5_plane_atomic_async_check(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct mdp5_plane_state *mdp5_state = to_mdp5_plane_state(state); + struct drm_crtc_state *crtc_state; + struct drm_rect clip; + int min_scale, max_scale; + int ret; + + crtc_state = drm_atomic_get_existing_crtc_state(state->state, + state->crtc); + if (WARN_ON(!crtc_state)) + return -EINVAL; + + if (!crtc_state->active) + return -EINVAL; + + mdp5_state = to_mdp5_plane_state(state); + + /* don't use fast path if we don't have a hwpipe allocated yet */ + if (!mdp5_state->hwpipe) + return -EINVAL; + + /* only allow changing of position(crtc x/y or src x/y) in fast path */ + if (plane->state->crtc != state->crtc || + plane->state->src_w != state->src_w || + plane->state->src_h != state->src_h || + plane->state->crtc_w != state->crtc_w || + plane->state->crtc_h != state->crtc_h || + !plane->state->fb || + plane->state->fb != state->fb) + return -EINVAL; + + clip.x1 = 0; + clip.y1 = 0; + clip.x2 = crtc_state->adjusted_mode.hdisplay; + clip.y2 = crtc_state->adjusted_mode.vdisplay; + min_scale = FRAC_16_16(1, 8); + max_scale = FRAC_16_16(8, 1); + + ret = drm_plane_helper_check_state(state, &clip, min_scale, + max_scale, true, true); + if (ret) + return ret; + + /* + * if the visibility of the plane changes (i.e, if the cursor is + * clipped out completely, we can't take the async path because + * we need to stage/unstage the plane from the Layer Mixer(s). We + * also assign/unassign the hwpipe(s) tied to the plane. We avoid + * taking the fast path for both these reasons. + */ + if (state->visible != plane->state->visible) + return -EINVAL; + + return 0; +} + +static void mdp5_plane_atomic_async_update(struct drm_plane *plane, + struct drm_plane_state *new_state) +{ + plane->state->src_x = new_state->src_x; + plane->state->src_y = new_state->src_y; + plane->state->crtc_x = new_state->crtc_x; + plane->state->crtc_y = new_state->crtc_y; + + if (plane_enabled(new_state)) { + struct mdp5_ctl *ctl; + struct mdp5_pipeline *pipeline = + mdp5_crtc_get_pipeline(plane->crtc); + int ret; + + ret = mdp5_plane_mode_set(plane, new_state->crtc, new_state->fb, + &new_state->src, &new_state->dst); + WARN_ON(ret < 0); + + ctl = mdp5_crtc_get_ctl(new_state->crtc); + + mdp5_ctl_commit(ctl, pipeline, mdp5_plane_get_flush(plane)); + } + + *to_mdp5_plane_state(plane->state) = + *to_mdp5_plane_state(new_state); +} + static const struct drm_plane_helper_funcs mdp5_plane_helper_funcs = { .prepare_fb = mdp5_plane_prepare_fb, .cleanup_fb = mdp5_plane_cleanup_fb, .atomic_check = mdp5_plane_atomic_check, .atomic_update = mdp5_plane_atomic_update, + .atomic_async_check = mdp5_plane_atomic_async_check, + .atomic_async_update = mdp5_plane_atomic_async_update, }; static void set_scanout_locked(struct mdp5_kms *mdp5_kms, @@ -996,84 +1061,6 @@ static int mdp5_plane_mode_set(struct drm_plane *plane, return ret; } -static int mdp5_update_cursor_plane_legacy(struct drm_plane *plane, - struct drm_crtc *crtc, struct drm_framebuffer *fb, - int crtc_x, int crtc_y, - unsigned int crtc_w, unsigned int crtc_h, - uint32_t src_x, uint32_t src_y, - uint32_t src_w, uint32_t src_h, - struct drm_modeset_acquire_ctx *ctx) -{ - struct drm_plane_state *plane_state, *new_plane_state; - struct mdp5_plane_state *mdp5_pstate; - struct drm_crtc_state *crtc_state = crtc->state; - int ret; - - if (!crtc_state->active || drm_atomic_crtc_needs_modeset(crtc_state)) - goto slow; - - plane_state = plane->state; - mdp5_pstate = to_mdp5_plane_state(plane_state); - - /* don't use fast path if we don't have a hwpipe allocated yet */ - if (!mdp5_pstate->hwpipe) - goto slow; - - /* only allow changing of position(crtc x/y or src x/y) in fast path */ - if (plane_state->crtc != crtc || - plane_state->src_w != src_w || - plane_state->src_h != src_h || - plane_state->crtc_w != crtc_w || - plane_state->crtc_h != crtc_h || - !plane_state->fb || - plane_state->fb != fb) - goto slow; - - new_plane_state = mdp5_plane_duplicate_state(plane); - if (!new_plane_state) - return -ENOMEM; - - new_plane_state->src_x = src_x; - new_plane_state->src_y = src_y; - new_plane_state->src_w = src_w; - new_plane_state->src_h = src_h; - new_plane_state->crtc_x = crtc_x; - new_plane_state->crtc_y = crtc_y; - new_plane_state->crtc_w = crtc_w; - new_plane_state->crtc_h = crtc_h; - - ret = mdp5_plane_atomic_check_with_state(crtc_state, new_plane_state); - if (ret) - goto slow_free; - - if (new_plane_state->visible) { - struct mdp5_ctl *ctl; - struct mdp5_pipeline *pipeline = mdp5_crtc_get_pipeline(crtc); - - ret = mdp5_plane_mode_set(plane, crtc, fb, - &new_plane_state->src, - &new_plane_state->dst); - WARN_ON(ret < 0); - - ctl = mdp5_crtc_get_ctl(crtc); - - mdp5_ctl_commit(ctl, pipeline, mdp5_plane_get_flush(plane)); - } - - *to_mdp5_plane_state(plane_state) = - *to_mdp5_plane_state(new_plane_state); - - mdp5_plane_destroy_state(plane, new_plane_state); - - return 0; -slow_free: - mdp5_plane_destroy_state(plane, new_plane_state); -slow: - return drm_atomic_helper_update_plane(plane, crtc, fb, - crtc_x, crtc_y, crtc_w, crtc_h, - src_x, src_y, src_w, src_h, ctx); -} - /* * Use this func and the one below only after the atomic state has been * successfully swapped @@ -1133,16 +1120,9 @@ struct drm_plane *mdp5_plane_init(struct drm_device *dev, mdp5_plane->nformats = mdp_get_formats(mdp5_plane->formats, ARRAY_SIZE(mdp5_plane->formats), false); - if (type == DRM_PLANE_TYPE_CURSOR) - ret = drm_universal_plane_init(dev, plane, 0xff, - &mdp5_cursor_plane_funcs, - mdp5_plane->formats, mdp5_plane->nformats, - NULL, type, NULL); - else - ret = drm_universal_plane_init(dev, plane, 0xff, - &mdp5_plane_funcs, - mdp5_plane->formats, mdp5_plane->nformats, - NULL, type, NULL); + ret = drm_universal_plane_init(dev, plane, 0xff, &mdp5_plane_funcs, + mdp5_plane->formats, mdp5_plane->nformats, + NULL, type, NULL); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c index 025d454163b0..bf5f8c39f34d 100644 --- a/drivers/gpu/drm/msm/msm_atomic.c +++ b/drivers/gpu/drm/msm/msm_atomic.c @@ -146,35 +146,6 @@ static void commit_worker(struct work_struct *work) complete_commit(container_of(work, struct msm_commit, work), true); } -/* - * this func is identical to the drm_atomic_helper_check, but we keep this - * because we might eventually need to have a more finegrained check - * sequence without using the atomic helpers. - * - * In the past, we first called drm_atomic_helper_check_planes, and then - * drm_atomic_helper_check_modeset. We needed this because the MDP5 plane's - * ->atomic_check could update ->mode_changed for pixel format changes. - * This, however isn't needed now because if there is a pixel format change, - * we just assign a new hwpipe for it with a new SMP allocation. We might - * eventually hit a condition where we would need to do a full modeset if - * we run out of planes. There, we'd probably need to set mode_changed. - */ -int msm_atomic_check(struct drm_device *dev, - struct drm_atomic_state *state) -{ - int ret; - - ret = drm_atomic_helper_check_modeset(dev, state); - if (ret) - return ret; - - ret = drm_atomic_helper_check_planes(dev, state); - if (ret) - return ret; - - return ret; -} - /** * drm_atomic_helper_commit - commit validated state object * @dev: DRM device @@ -202,6 +173,18 @@ int msm_atomic_commit(struct drm_device *dev, if (ret) return ret; + /* + * Note that plane->atomic_async_check() should fail if we need + * to re-assign hwpipe or anything that touches global atomic + * state, so we'll never go down the async update path in those + * cases. + */ + if (state->async_update) { + drm_atomic_helper_async_commit(dev, state); + drm_atomic_helper_cleanup_planes(dev, state); + return 0; + } + c = commit_init(state); if (!c) { ret = -ENOMEM; diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 606df7bea97b..0a3ea3034e39 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -29,9 +29,12 @@ * - 1.0.0 - initial interface * - 1.1.0 - adds madvise, and support for submits with > 4 cmd buffers * - 1.2.0 - adds explicit fence support for submit ioctl + * - 1.3.0 - adds GMEM_BASE + NR_RINGS params, SUBMITQUEUE_NEW + + * SUBMITQUEUE_CLOSE ioctls, and MSM_INFO_IOVA flag for + * MSM_GEM_INFO ioctl. */ #define MSM_VERSION_MAJOR 1 -#define MSM_VERSION_MINOR 2 +#define MSM_VERSION_MINOR 3 #define MSM_VERSION_PATCHLEVEL 0 static void msm_fb_output_poll_changed(struct drm_device *dev) @@ -44,7 +47,7 @@ static void msm_fb_output_poll_changed(struct drm_device *dev) static const struct drm_mode_config_funcs mode_config_funcs = { .fb_create = msm_framebuffer_create, .output_poll_changed = msm_fb_output_poll_changed, - .atomic_check = msm_atomic_check, + .atomic_check = drm_atomic_helper_check, .atomic_commit = msm_atomic_commit, .atomic_state_alloc = msm_atomic_state_alloc, .atomic_state_clear = msm_atomic_state_clear, @@ -211,7 +214,6 @@ static int msm_drm_uninit(struct device *dev) struct drm_device *ddev = platform_get_drvdata(pdev); struct msm_drm_private *priv = ddev->dev_private; struct msm_kms *kms = priv->kms; - struct msm_gpu *gpu = priv->gpu; struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl; struct vblank_event *vbl_ev, *tmp; @@ -253,15 +255,6 @@ static int msm_drm_uninit(struct device *dev) if (kms && kms->funcs) kms->funcs->destroy(kms); - if (gpu) { - mutex_lock(&ddev->struct_mutex); - // XXX what do we do here? - //pm_runtime_enable(&pdev->dev); - gpu->funcs->pm_suspend(gpu); - mutex_unlock(&ddev->struct_mutex); - gpu->funcs->destroy(gpu); - } - if (priv->vram.paddr) { unsigned long attrs = DMA_ATTR_NO_KERNEL_MAPPING; drm_mm_takedown(&priv->vram.mm); @@ -514,24 +507,37 @@ static void load_gpu(struct drm_device *dev) mutex_unlock(&init_lock); } -static int msm_open(struct drm_device *dev, struct drm_file *file) +static int context_init(struct drm_device *dev, struct drm_file *file) { struct msm_file_private *ctx; - /* For now, load gpu on open.. to avoid the requirement of having - * firmware in the initrd. - */ - load_gpu(dev); - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; + msm_submitqueue_init(dev, ctx); + file->driver_priv = ctx; return 0; } +static int msm_open(struct drm_device *dev, struct drm_file *file) +{ + /* For now, load gpu on open.. to avoid the requirement of having + * firmware in the initrd. + */ + load_gpu(dev); + + return context_init(dev, file); +} + +static void context_close(struct msm_file_private *ctx) +{ + msm_submitqueue_close(ctx); + kfree(ctx); +} + static void msm_postclose(struct drm_device *dev, struct drm_file *file) { struct msm_drm_private *priv = dev->dev_private; @@ -542,7 +548,7 @@ static void msm_postclose(struct drm_device *dev, struct drm_file *file) priv->lastctx = NULL; mutex_unlock(&dev->struct_mutex); - kfree(ctx); + context_close(ctx); } static void msm_lastclose(struct drm_device *dev) @@ -737,16 +743,27 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data, struct msm_drm_private *priv = dev->dev_private; struct drm_msm_wait_fence *args = data; ktime_t timeout = to_ktime(args->timeout); + struct msm_gpu_submitqueue *queue; + struct msm_gpu *gpu = priv->gpu; + int ret; if (args->pad) { DRM_ERROR("invalid pad: %08x\n", args->pad); return -EINVAL; } - if (!priv->gpu) + if (!gpu) return 0; - return msm_wait_fence(priv->gpu->fctx, args->fence, &timeout, true); + queue = msm_submitqueue_get(file->driver_priv, args->queueid); + if (!queue) + return -ENOENT; + + ret = msm_wait_fence(gpu->rb[queue->prio]->fctx, args->fence, &timeout, + true); + + msm_submitqueue_put(queue); + return ret; } static int msm_ioctl_gem_madvise(struct drm_device *dev, void *data, @@ -787,6 +804,28 @@ unlock: return ret; } + +static int msm_ioctl_submitqueue_new(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_msm_submitqueue *args = data; + + if (args->flags & ~MSM_SUBMITQUEUE_FLAGS) + return -EINVAL; + + return msm_submitqueue_create(dev, file->driver_priv, args->prio, + args->flags, &args->id); +} + + +static int msm_ioctl_submitqueue_close(struct drm_device *dev, void *data, + struct drm_file *file) +{ + u32 id = *(u32 *) data; + + return msm_submitqueue_remove(file->driver_priv, id); +} + static const struct drm_ioctl_desc msm_ioctls[] = { DRM_IOCTL_DEF_DRV(MSM_GET_PARAM, msm_ioctl_get_param, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(MSM_GEM_NEW, msm_ioctl_gem_new, DRM_AUTH|DRM_RENDER_ALLOW), @@ -796,6 +835,8 @@ static const struct drm_ioctl_desc msm_ioctls[] = { DRM_IOCTL_DEF_DRV(MSM_GEM_SUBMIT, msm_ioctl_gem_submit, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(MSM_WAIT_FENCE, msm_ioctl_wait_fence, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(MSM_GEM_MADVISE, msm_ioctl_gem_madvise, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_NEW, msm_ioctl_submitqueue_new, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE, msm_ioctl_submitqueue_close, DRM_AUTH|DRM_RENDER_ALLOW), }; static const struct vm_operations_struct vm_ops = { diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 5e8109c07560..c646843d8822 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -56,11 +56,9 @@ struct msm_gem_address_space; struct msm_gem_vma; struct msm_file_private { - /* currently we don't do anything useful with this.. but when - * per-context address spaces are supported we'd keep track of - * the context's page-tables here. - */ - int dummy; + rwlock_t queuelock; + struct list_head submitqueues; + int queueid; }; enum msm_mdp_plane_property { @@ -76,6 +74,8 @@ struct msm_vblank_ctrl { spinlock_t lock; }; +#define MSM_GPU_MAX_RINGS 4 + struct msm_drm_private { struct drm_device *dev; @@ -108,7 +108,8 @@ struct msm_drm_private { struct drm_fb_helper *fbdev; - struct msm_rd_state *rd; + struct msm_rd_state *rd; /* debugfs to dump all submits */ + struct msm_rd_state *hangrd; /* debugfs to dump hanging submits */ struct msm_perf_state *perf; /* list of GEM objects: */ @@ -154,20 +155,12 @@ struct msm_drm_private { struct shrinker shrinker; struct msm_vblank_ctrl vblank_ctrl; - - /* task holding struct_mutex.. currently only used in submit path - * to detect and reject faults from copy_from_user() for submit - * ioctl. - */ - struct task_struct *struct_mutex_task; }; struct msm_format { uint32_t pixel_format; }; -int msm_atomic_check(struct drm_device *dev, - struct drm_atomic_state *state); int msm_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state, bool nonblock); struct drm_atomic_state *msm_atomic_state_alloc(struct drm_device *dev); @@ -219,6 +212,7 @@ struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, int msm_gem_prime_pin(struct drm_gem_object *obj); void msm_gem_prime_unpin(struct drm_gem_object *obj); void *msm_gem_get_vaddr(struct drm_gem_object *obj); +void *msm_gem_get_vaddr_active(struct drm_gem_object *obj); void msm_gem_put_vaddr(struct drm_gem_object *obj); int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv); int msm_gem_sync_object(struct drm_gem_object *obj, @@ -303,7 +297,8 @@ void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m); int msm_debugfs_late_init(struct drm_device *dev); int msm_rd_debugfs_init(struct drm_minor *minor); void msm_rd_debugfs_cleanup(struct msm_drm_private *priv); -void msm_rd_dump_submit(struct msm_gem_submit *submit); +void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit, + const char *fmt, ...); int msm_perf_debugfs_init(struct drm_minor *minor); void msm_perf_debugfs_cleanup(struct msm_drm_private *priv); #else @@ -319,6 +314,18 @@ void __iomem *msm_ioremap(struct platform_device *pdev, const char *name, void msm_writel(u32 data, void __iomem *addr); u32 msm_readl(const void __iomem *addr); +struct msm_gpu_submitqueue; +int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx); +struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx, + u32 id); +int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, + u32 prio, u32 flags, u32 *id); +int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id); +void msm_submitqueue_close(struct msm_file_private *ctx); + +void msm_submitqueue_destroy(struct kref *kref); + + #define DBG(fmt, ...) DRM_DEBUG_DRIVER(fmt"\n", ##__VA_ARGS__) #define VERB(fmt, ...) if (0) DRM_DEBUG_DRIVER(fmt"\n", ##__VA_ARGS__) diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c index a2f89bac9c16..349c12f670eb 100644 --- a/drivers/gpu/drm/msm/msm_fence.c +++ b/drivers/gpu/drm/msm/msm_fence.c @@ -31,7 +31,7 @@ msm_fence_context_alloc(struct drm_device *dev, const char *name) return ERR_PTR(-ENOMEM); fctx->dev = dev; - fctx->name = name; + strncpy(fctx->name, name, sizeof(fctx->name)); fctx->context = dma_fence_context_alloc(1); init_waitqueue_head(&fctx->event); spin_lock_init(&fctx->spinlock); diff --git a/drivers/gpu/drm/msm/msm_fence.h b/drivers/gpu/drm/msm/msm_fence.h index 56061aa1959d..1aa6a4c6530c 100644 --- a/drivers/gpu/drm/msm/msm_fence.h +++ b/drivers/gpu/drm/msm/msm_fence.h @@ -22,7 +22,7 @@ struct msm_fence_context { struct drm_device *dev; - const char *name; + char name[32]; unsigned context; /* last_fence == completed_fence --> no pending work */ uint32_t last_fence; /* last assigned fence */ diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index f15821a0d900..81fe6d6740ce 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -470,14 +470,16 @@ fail: return ret; } -void *msm_gem_get_vaddr(struct drm_gem_object *obj) +static void *get_vaddr(struct drm_gem_object *obj, unsigned madv) { struct msm_gem_object *msm_obj = to_msm_bo(obj); int ret = 0; mutex_lock(&msm_obj->lock); - if (WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED)) { + if (WARN_ON(msm_obj->madv > madv)) { + dev_err(obj->dev->dev, "Invalid madv state: %u vs %u\n", + msm_obj->madv, madv); mutex_unlock(&msm_obj->lock); return ERR_PTR(-EBUSY); } @@ -513,6 +515,22 @@ fail: return ERR_PTR(ret); } +void *msm_gem_get_vaddr(struct drm_gem_object *obj) +{ + return get_vaddr(obj, MSM_MADV_WILLNEED); +} + +/* + * Don't use this! It is for the very special case of dumping + * submits from GPU hangs or faults, were the bo may already + * be MSM_MADV_DONTNEED, but we know the buffer is still on the + * active list. + */ +void *msm_gem_get_vaddr_active(struct drm_gem_object *obj) +{ + return get_vaddr(obj, __MSM_MADV_PURGED); +} + void msm_gem_put_vaddr(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); @@ -610,17 +628,6 @@ int msm_gem_sync_object(struct drm_gem_object *obj, struct dma_fence *fence; int i, ret; - if (!exclusive) { - /* NOTE: _reserve_shared() must happen before _add_shared_fence(), - * which makes this a slightly strange place to call it. OTOH this - * is a convenient can-fail point to hook it in. (And similar to - * how etnaviv and nouveau handle this.) - */ - ret = reservation_object_reserve_shared(msm_obj->resv); - if (ret) - return ret; - } - fobj = reservation_object_get_list(msm_obj->resv); if (!fobj || (fobj->shared_count == 0)) { fence = reservation_object_get_excl(msm_obj->resv); @@ -1045,10 +1052,10 @@ static void *_msm_gem_kernel_new(struct drm_device *dev, uint32_t size, } vaddr = msm_gem_get_vaddr(obj); - if (!vaddr) { + if (IS_ERR(vaddr)) { msm_gem_put_iova(obj, aspace); drm_gem_object_unreference(obj); - return ERR_PTR(-ENOMEM); + return ERR_CAST(vaddr); } if (bo) diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 91c210d2359c..9320e184b48d 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -138,12 +138,15 @@ void msm_gem_vunmap(struct drm_gem_object *obj, enum msm_gem_lock subclass); struct msm_gem_submit { struct drm_device *dev; struct msm_gpu *gpu; - struct list_head node; /* node in gpu submit_list */ + struct list_head node; /* node in ring submit list */ struct list_head bo_list; struct ww_acquire_ctx ticket; + uint32_t seqno; /* Sequence number of the submit on the ring */ struct dma_fence *fence; + struct msm_gpu_submitqueue *queue; struct pid *pid; /* submitting process */ bool valid; /* true if no cmdstream patching needed */ + struct msm_ringbuffer *ring; unsigned int nr_cmds; unsigned int nr_bos; struct { diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 5d0a75d4b249..b8dc8f96caf2 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -31,7 +31,8 @@ #define BO_PINNED 0x2000 static struct msm_gem_submit *submit_create(struct drm_device *dev, - struct msm_gpu *gpu, uint32_t nr_bos, uint32_t nr_cmds) + struct msm_gpu *gpu, struct msm_gpu_submitqueue *queue, + uint32_t nr_bos, uint32_t nr_cmds) { struct msm_gem_submit *submit; uint64_t sz = sizeof(*submit) + ((u64)nr_bos * sizeof(submit->bos[0])) + @@ -49,6 +50,8 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, submit->fence = NULL; submit->pid = get_pid(task_pid(current)); submit->cmd = (void *)&submit->bos[nr_bos]; + submit->queue = queue; + submit->ring = gpu->rb[queue->prio]; /* initially, until copy_from_user() and bo lookup succeeds: */ submit->nr_bos = 0; @@ -66,6 +69,8 @@ void msm_gem_submit_free(struct msm_gem_submit *submit) dma_fence_put(submit->fence); list_del(&submit->node); put_pid(submit->pid); + msm_submitqueue_put(submit->queue); + kfree(submit); } @@ -156,7 +161,8 @@ out: return ret; } -static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i) +static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, + int i, bool backoff) { struct msm_gem_object *msm_obj = submit->bos[i].obj; @@ -166,7 +172,7 @@ static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i) if (submit->bos[i].flags & BO_LOCKED) ww_mutex_unlock(&msm_obj->resv->lock); - if (!(submit->bos[i].flags & BO_VALID)) + if (backoff && !(submit->bos[i].flags & BO_VALID)) submit->bos[i].iova = 0; submit->bos[i].flags &= ~(BO_LOCKED | BO_PINNED); @@ -201,10 +207,10 @@ retry: fail: for (; i >= 0; i--) - submit_unlock_unpin_bo(submit, i); + submit_unlock_unpin_bo(submit, i, true); if (slow_locked > 0) - submit_unlock_unpin_bo(submit, slow_locked); + submit_unlock_unpin_bo(submit, slow_locked, true); if (ret == -EDEADLK) { struct msm_gem_object *msm_obj = submit->bos[contended].obj; @@ -221,7 +227,7 @@ fail: return ret; } -static int submit_fence_sync(struct msm_gem_submit *submit) +static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) { int i, ret = 0; @@ -229,7 +235,22 @@ static int submit_fence_sync(struct msm_gem_submit *submit) struct msm_gem_object *msm_obj = submit->bos[i].obj; bool write = submit->bos[i].flags & MSM_SUBMIT_BO_WRITE; - ret = msm_gem_sync_object(&msm_obj->base, submit->gpu->fctx, write); + if (!write) { + /* NOTE: _reserve_shared() must happen before + * _add_shared_fence(), which makes this a slightly + * strange place to call it. OTOH this is a + * convenient can-fail point to hook it in. + */ + ret = reservation_object_reserve_shared(msm_obj->resv); + if (ret) + return ret; + } + + if (no_implicit) + continue; + + ret = msm_gem_sync_object(&msm_obj->base, submit->ring->fctx, + write); if (ret) break; } @@ -373,7 +394,7 @@ static void submit_cleanup(struct msm_gem_submit *submit) for (i = 0; i < submit->nr_bos; i++) { struct msm_gem_object *msm_obj = submit->bos[i].obj; - submit_unlock_unpin_bo(submit, i); + submit_unlock_unpin_bo(submit, i, false); list_del_init(&msm_obj->submit_entry); drm_gem_object_unreference(&msm_obj->base); } @@ -391,6 +412,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct msm_gpu *gpu = priv->gpu; struct dma_fence *in_fence = NULL; struct sync_file *sync_file = NULL; + struct msm_gpu_submitqueue *queue; + struct msm_ringbuffer *ring; int out_fence_fd = -1; unsigned i; int ret; @@ -407,6 +430,12 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (MSM_PIPE_FLAGS(args->flags) & ~MSM_SUBMIT_FLAGS) return -EINVAL; + queue = msm_submitqueue_get(ctx, args->queueid); + if (!queue) + return -ENOENT; + + ring = gpu->rb[queue->prio]; + if (args->flags & MSM_SUBMIT_FENCE_FD_IN) { in_fence = sync_file_get_fence(args->fence_fd); @@ -417,7 +446,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, * Wait if the fence is from a foreign context, or if the fence * array contains any fence from a foreign context. */ - if (!dma_fence_match_context(in_fence, gpu->fctx->context)) { + if (!dma_fence_match_context(in_fence, ring->fctx->context)) { ret = dma_fence_wait(in_fence, true); if (ret) return ret; @@ -435,9 +464,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, goto out_unlock; } } - priv->struct_mutex_task = current; - submit = submit_create(dev, gpu, args->nr_bos, args->nr_cmds); + submit = submit_create(dev, gpu, queue, args->nr_bos, args->nr_cmds); if (!submit) { ret = -ENOMEM; goto out_unlock; @@ -451,11 +479,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (ret) goto out; - if (!(args->flags & MSM_SUBMIT_NO_IMPLICIT)) { - ret = submit_fence_sync(submit); - if (ret) - goto out; - } + ret = submit_fence_sync(submit, !!(args->flags & MSM_SUBMIT_NO_IMPLICIT)); + if (ret) + goto out; ret = submit_pin_objects(submit); if (ret) @@ -522,7 +548,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, submit->nr_cmds = i; - submit->fence = msm_fence_alloc(gpu->fctx); + submit->fence = msm_fence_alloc(ring->fctx); if (IS_ERR(submit->fence)) { ret = PTR_ERR(submit->fence); submit->fence = NULL; @@ -555,7 +581,6 @@ out: out_unlock: if (ret && (out_fence_fd >= 0)) put_unused_fd(out_fence_fd); - priv->struct_mutex_task = NULL; mutex_unlock(&dev->struct_mutex); return ret; } diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index ffbff27600e0..8d4477818ec2 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -20,6 +20,8 @@ #include "msm_mmu.h" #include "msm_fence.h" +#include <linux/string_helpers.h> + /* * Power Management: @@ -221,33 +223,102 @@ int msm_gpu_hw_init(struct msm_gpu *gpu) * Hangcheck detection for locked gpu: */ +static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, + uint32_t fence) +{ + struct msm_gem_submit *submit; + + list_for_each_entry(submit, &ring->submits, node) { + if (submit->seqno > fence) + break; + + msm_update_fence(submit->ring->fctx, + submit->fence->seqno); + } +} + +static struct msm_gem_submit * +find_submit(struct msm_ringbuffer *ring, uint32_t fence) +{ + struct msm_gem_submit *submit; + + WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex)); + + list_for_each_entry(submit, &ring->submits, node) + if (submit->seqno == fence) + return submit; + + return NULL; +} + static void retire_submits(struct msm_gpu *gpu); static void recover_worker(struct work_struct *work) { struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); struct drm_device *dev = gpu->dev; + struct msm_drm_private *priv = dev->dev_private; struct msm_gem_submit *submit; - uint32_t fence = gpu->funcs->last_fence(gpu); - - msm_update_fence(gpu->fctx, fence + 1); + struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); + int i; mutex_lock(&dev->struct_mutex); dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); - list_for_each_entry(submit, &gpu->submit_list, node) { - if (submit->fence->seqno == (fence + 1)) { - struct task_struct *task; - - rcu_read_lock(); - task = pid_task(submit->pid, PIDTYPE_PID); - if (task) { - dev_err(dev->dev, "%s: offending task: %s\n", - gpu->name, task->comm); - } - rcu_read_unlock(); - break; + + submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1); + if (submit) { + struct task_struct *task; + + rcu_read_lock(); + task = pid_task(submit->pid, PIDTYPE_PID); + if (task) { + char *cmd; + + /* + * So slightly annoying, in other paths like + * mmap'ing gem buffers, mmap_sem is acquired + * before struct_mutex, which means we can't + * hold struct_mutex across the call to + * get_cmdline(). But submits are retired + * from the same in-order workqueue, so we can + * safely drop the lock here without worrying + * about the submit going away. + */ + mutex_unlock(&dev->struct_mutex); + cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL); + mutex_lock(&dev->struct_mutex); + + dev_err(dev->dev, "%s: offending task: %s (%s)\n", + gpu->name, task->comm, cmd); + + msm_rd_dump_submit(priv->hangrd, submit, + "offending task: %s (%s)", task->comm, cmd); + } else { + msm_rd_dump_submit(priv->hangrd, submit, NULL); } + rcu_read_unlock(); + } + + + /* + * Update all the rings with the latest and greatest fence.. this + * needs to happen after msm_rd_dump_submit() to ensure that the + * bo's referenced by the offending submit are still around. + */ + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + uint32_t fence = ring->memptrs->fence; + + /* + * For the current (faulting?) ring/submit advance the fence by + * one more to clear the faulting submit + */ + if (ring == cur_ring) + fence++; + + update_fences(gpu, ring, fence); } if (msm_gpu_active(gpu)) { @@ -258,9 +329,15 @@ static void recover_worker(struct work_struct *work) gpu->funcs->recover(gpu); pm_runtime_put_sync(&gpu->pdev->dev); - /* replay the remaining submits after the one that hung: */ - list_for_each_entry(submit, &gpu->submit_list, node) { - gpu->funcs->submit(gpu, submit, NULL); + /* + * Replay all remaining submits starting with highest priority + * ring + */ + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + list_for_each_entry(submit, &ring->submits, node) + gpu->funcs->submit(gpu, submit, NULL); } } @@ -281,25 +358,27 @@ static void hangcheck_handler(unsigned long data) struct msm_gpu *gpu = (struct msm_gpu *)data; struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; - uint32_t fence = gpu->funcs->last_fence(gpu); + struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); + uint32_t fence = ring->memptrs->fence; - if (fence != gpu->hangcheck_fence) { + if (fence != ring->hangcheck_fence) { /* some progress has been made.. ya! */ - gpu->hangcheck_fence = fence; - } else if (fence < gpu->fctx->last_fence) { + ring->hangcheck_fence = fence; + } else if (fence < ring->seqno) { /* no progress and not done.. hung! */ - gpu->hangcheck_fence = fence; - dev_err(dev->dev, "%s: hangcheck detected gpu lockup!\n", - gpu->name); + ring->hangcheck_fence = fence; + dev_err(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", + gpu->name, ring->id); dev_err(dev->dev, "%s: completed fence: %u\n", gpu->name, fence); dev_err(dev->dev, "%s: submitted fence: %u\n", - gpu->name, gpu->fctx->last_fence); + gpu->name, ring->seqno); + queue_work(priv->wq, &gpu->recover_work); } /* if still more pending work, reset the hangcheck timer: */ - if (gpu->fctx->last_fence > gpu->hangcheck_fence) + if (ring->seqno > ring->hangcheck_fence) hangcheck_timer_reset(gpu); /* workaround for missing irq: */ @@ -428,19 +507,18 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) static void retire_submits(struct msm_gpu *gpu) { struct drm_device *dev = gpu->dev; + struct msm_gem_submit *submit, *tmp; + int i; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - while (!list_empty(&gpu->submit_list)) { - struct msm_gem_submit *submit; + /* Retire the commits starting with highest priority */ + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; - submit = list_first_entry(&gpu->submit_list, - struct msm_gem_submit, node); - - if (dma_fence_is_signaled(submit->fence)) { - retire_submit(gpu, submit); - } else { - break; + list_for_each_entry_safe(submit, tmp, &ring->submits, node) { + if (dma_fence_is_signaled(submit->fence)) + retire_submit(gpu, submit); } } } @@ -449,9 +527,10 @@ static void retire_worker(struct work_struct *work) { struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); struct drm_device *dev = gpu->dev; - uint32_t fence = gpu->funcs->last_fence(gpu); + int i; - msm_update_fence(gpu->fctx, fence); + for (i = 0; i < gpu->nr_rings; i++) + update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence); mutex_lock(&dev->struct_mutex); retire_submits(gpu); @@ -472,6 +551,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, { struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; + struct msm_ringbuffer *ring = submit->ring; int i; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); @@ -480,9 +560,11 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, msm_gpu_hw_init(gpu); - list_add_tail(&submit->node, &gpu->submit_list); + submit->seqno = ++ring->seqno; - msm_rd_dump_submit(submit); + list_add_tail(&submit->node, &ring->submits); + + msm_rd_dump_submit(priv->rd, submit, NULL); update_sw_cntrs(gpu); @@ -605,7 +687,9 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, const char *name, struct msm_gpu_config *config) { - int ret; + int i, ret, nr_rings = config->nr_rings; + void *memptrs; + uint64_t memptrs_iova; if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); @@ -613,18 +697,11 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, gpu->dev = drm; gpu->funcs = funcs; gpu->name = name; - gpu->fctx = msm_fence_context_alloc(drm, name); - if (IS_ERR(gpu->fctx)) { - ret = PTR_ERR(gpu->fctx); - gpu->fctx = NULL; - goto fail; - } INIT_LIST_HEAD(&gpu->active_list); INIT_WORK(&gpu->retire_work, retire_worker); INIT_WORK(&gpu->recover_work, recover_worker); - INIT_LIST_HEAD(&gpu->submit_list); setup_timer(&gpu->hangcheck_timer, hangcheck_handler, (unsigned long)gpu); @@ -689,36 +766,79 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, goto fail; } - /* Create ringbuffer: */ - gpu->rb = msm_ringbuffer_new(gpu, config->ringsz); - if (IS_ERR(gpu->rb)) { - ret = PTR_ERR(gpu->rb); - gpu->rb = NULL; - dev_err(drm->dev, "could not create ringbuffer: %d\n", ret); + memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo), + MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo, + &memptrs_iova); + + if (IS_ERR(memptrs)) { + ret = PTR_ERR(memptrs); + dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); goto fail; } + if (nr_rings > ARRAY_SIZE(gpu->rb)) { + DRM_DEV_INFO_ONCE(drm->dev, "Only creating %zu ringbuffers\n", + ARRAY_SIZE(gpu->rb)); + nr_rings = ARRAY_SIZE(gpu->rb); + } + + /* Create ringbuffer(s): */ + for (i = 0; i < nr_rings; i++) { + gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova); + + if (IS_ERR(gpu->rb[i])) { + ret = PTR_ERR(gpu->rb[i]); + dev_err(drm->dev, + "could not create ringbuffer %d: %d\n", i, ret); + goto fail; + } + + memptrs += sizeof(struct msm_rbmemptrs); + memptrs_iova += sizeof(struct msm_rbmemptrs); + } + + gpu->nr_rings = nr_rings; + return 0; fail: + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + msm_ringbuffer_destroy(gpu->rb[i]); + gpu->rb[i] = NULL; + } + + if (gpu->memptrs_bo) { + msm_gem_put_vaddr(gpu->memptrs_bo); + msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); + drm_gem_object_unreference_unlocked(gpu->memptrs_bo); + } + platform_set_drvdata(pdev, NULL); return ret; } void msm_gpu_cleanup(struct msm_gpu *gpu) { + int i; + DBG("%s", gpu->name); WARN_ON(!list_empty(&gpu->active_list)); bs_fini(gpu); - if (gpu->rb) { - if (gpu->rb_iova) - msm_gem_put_iova(gpu->rb->bo, gpu->aspace); - msm_ringbuffer_destroy(gpu->rb); + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + msm_ringbuffer_destroy(gpu->rb[i]); + gpu->rb[i] = NULL; } - if (gpu->aspace) { + + if (gpu->memptrs_bo) { + msm_gem_put_vaddr(gpu->memptrs_bo); + msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); + drm_gem_object_unreference_unlocked(gpu->memptrs_bo); + } + + if (!IS_ERR_OR_NULL(gpu->aspace)) { gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu, NULL, 0); msm_gem_address_space_put(gpu->aspace); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index df4e2771fb85..e113d64574d3 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -33,7 +33,7 @@ struct msm_gpu_config { const char *irqname; uint64_t va_start; uint64_t va_end; - unsigned int ringsz; + unsigned int nr_rings; }; /* So far, with hardware that I've seen to date, we can have: @@ -57,9 +57,9 @@ struct msm_gpu_funcs { int (*pm_resume)(struct msm_gpu *gpu); void (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx); - void (*flush)(struct msm_gpu *gpu); + void (*flush)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); irqreturn_t (*irq)(struct msm_gpu *irq); - uint32_t (*last_fence)(struct msm_gpu *gpu); + struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu); void (*recover)(struct msm_gpu *gpu); void (*destroy)(struct msm_gpu *gpu); #ifdef CONFIG_DEBUG_FS @@ -86,16 +86,12 @@ struct msm_gpu { const struct msm_gpu_perfcntr *perfcntrs; uint32_t num_perfcntrs; - /* ringbuffer: */ - struct msm_ringbuffer *rb; - uint64_t rb_iova; + struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS]; + int nr_rings; /* list of GEM active objects: */ struct list_head active_list; - /* fencing: */ - struct msm_fence_context *fctx; - /* does gpu need hw_init? */ bool needs_hw_init; @@ -126,15 +122,31 @@ struct msm_gpu { #define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */ #define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD) struct timer_list hangcheck_timer; - uint32_t hangcheck_fence; struct work_struct recover_work; - struct list_head submit_list; + struct drm_gem_object *memptrs_bo; }; +/* It turns out that all targets use the same ringbuffer size */ +#define MSM_GPU_RINGBUFFER_SZ SZ_32K +#define MSM_GPU_RINGBUFFER_BLKSIZE 32 + +#define MSM_GPU_RB_CNTL_DEFAULT \ + (AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) | \ + AXXX_CP_RB_CNTL_BLKSZ(ilog2(MSM_GPU_RINGBUFFER_BLKSIZE / 8))) + static inline bool msm_gpu_active(struct msm_gpu *gpu) { - return gpu->fctx->last_fence > gpu->funcs->last_fence(gpu); + int i; + + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + if (ring->seqno > ring->memptrs->fence) + return true; + } + + return false; } /* Perf-Counters: @@ -150,6 +162,15 @@ struct msm_gpu_perfcntr { const char *name; }; +struct msm_gpu_submitqueue { + int id; + u32 flags; + u32 prio; + int faults; + struct list_head node; + struct kref ref; +}; + static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data) { msm_writel(data, gpu->mmio + (reg << 2)); @@ -223,4 +244,10 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev); void __init adreno_register(void); void __exit adreno_unregister(void); +static inline void msm_submitqueue_put(struct msm_gpu_submitqueue *queue) +{ + if (queue) + kref_put(&queue->ref, msm_submitqueue_destroy); +} + #endif /* __MSM_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index 0366b8092f97..3aa8a8576abe 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -19,11 +19,17 @@ * * tail -f /sys/kernel/debug/dri/<minor>/rd > logfile.rd * - * To log the cmdstream in a format that is understood by freedreno/cffdump + * to log the cmdstream in a format that is understood by freedreno/cffdump * utility. By comparing the last successfully completed fence #, to the * cmdstream for the next fence, you can narrow down which process and submit * caused the gpu crash/lockup. * + * Additionally: + * + * tail -f /sys/kernel/debug/dri/<minor>/hangrd > logfile.rd + * + * will capture just the cmdstream from submits which triggered a GPU hang. + * * This bypasses drm_debugfs_create_files() mainly because we need to use * our own fops for a bit more control. In particular, we don't want to * do anything if userspace doesn't have the debugfs file open. @@ -111,10 +117,14 @@ static void rd_write(struct msm_rd_state *rd, const void *buf, int sz) wait_event(rd->fifo_event, circ_space(&rd->fifo) > 0); + /* Note that smp_load_acquire() is not strictly required + * as CIRC_SPACE_TO_END() does not access the tail more + * than once. + */ n = min(sz, circ_space_to_end(&rd->fifo)); memcpy(fptr, ptr, n); - fifo->head = (fifo->head + n) & (BUF_SZ - 1); + smp_store_release(&fifo->head, (fifo->head + n) & (BUF_SZ - 1)); sz -= n; ptr += n; @@ -145,13 +155,17 @@ static ssize_t rd_read(struct file *file, char __user *buf, if (ret) goto out; + /* Note that smp_load_acquire() is not strictly required + * as CIRC_CNT_TO_END() does not access the head more than + * once. + */ n = min_t(int, sz, circ_count_to_end(&rd->fifo)); if (copy_to_user(buf, fptr, n)) { ret = -EFAULT; goto out; } - fifo->tail = (fifo->tail + n) & (BUF_SZ - 1); + smp_store_release(&fifo->tail, (fifo->tail + n) & (BUF_SZ - 1)); *ppos += n; wake_up_all(&rd->fifo_event); @@ -212,53 +226,89 @@ static const struct file_operations rd_debugfs_fops = { .release = rd_release, }; -int msm_rd_debugfs_init(struct drm_minor *minor) + +static void rd_cleanup(struct msm_rd_state *rd) +{ + if (!rd) + return; + + mutex_destroy(&rd->read_lock); + kfree(rd); +} + +static struct msm_rd_state *rd_init(struct drm_minor *minor, const char *name) { - struct msm_drm_private *priv = minor->dev->dev_private; struct msm_rd_state *rd; struct dentry *ent; - - /* only create on first minor: */ - if (priv->rd) - return 0; + int ret = 0; rd = kzalloc(sizeof(*rd), GFP_KERNEL); if (!rd) - return -ENOMEM; + return ERR_PTR(-ENOMEM); rd->dev = minor->dev; rd->fifo.buf = rd->buf; mutex_init(&rd->read_lock); - priv->rd = rd; init_waitqueue_head(&rd->fifo_event); - ent = debugfs_create_file("rd", S_IFREG | S_IRUGO, + ent = debugfs_create_file(name, S_IFREG | S_IRUGO, minor->debugfs_root, rd, &rd_debugfs_fops); if (!ent) { - DRM_ERROR("Cannot create /sys/kernel/debug/dri/%pd/rd\n", - minor->debugfs_root); + DRM_ERROR("Cannot create /sys/kernel/debug/dri/%pd/%s\n", + minor->debugfs_root, name); + ret = -ENOMEM; + goto fail; + } + + return rd; + +fail: + rd_cleanup(rd); + return ERR_PTR(ret); +} + +int msm_rd_debugfs_init(struct drm_minor *minor) +{ + struct msm_drm_private *priv = minor->dev->dev_private; + struct msm_rd_state *rd; + int ret; + + /* only create on first minor: */ + if (priv->rd) + return 0; + + rd = rd_init(minor, "rd"); + if (IS_ERR(rd)) { + ret = PTR_ERR(rd); goto fail; } + priv->rd = rd; + + rd = rd_init(minor, "hangrd"); + if (IS_ERR(rd)) { + ret = PTR_ERR(rd); + goto fail; + } + + priv->hangrd = rd; + return 0; fail: msm_rd_debugfs_cleanup(priv); - return -1; + return ret; } void msm_rd_debugfs_cleanup(struct msm_drm_private *priv) { - struct msm_rd_state *rd = priv->rd; - - if (!rd) - return; - + rd_cleanup(priv->rd); priv->rd = NULL; - mutex_destroy(&rd->read_lock); - kfree(rd); + + rd_cleanup(priv->hangrd); + priv->hangrd = NULL; } static void snapshot_buf(struct msm_rd_state *rd, @@ -268,10 +318,6 @@ static void snapshot_buf(struct msm_rd_state *rd, struct msm_gem_object *obj = submit->bos[idx].obj; const char *buf; - buf = msm_gem_get_vaddr(&obj->base); - if (IS_ERR(buf)) - return; - if (iova) { buf += iova - submit->bos[idx].iova; } else { @@ -279,20 +325,33 @@ static void snapshot_buf(struct msm_rd_state *rd, size = obj->base.size; } + /* + * Always write the GPUADDR header so can get a complete list of all the + * buffers in the cmd + */ rd_write_section(rd, RD_GPUADDR, (uint32_t[3]){ iova, size, iova >> 32 }, 12); + + /* But only dump the contents of buffers marked READ */ + if (!(submit->bos[idx].flags & MSM_SUBMIT_BO_READ)) + return; + + buf = msm_gem_get_vaddr_active(&obj->base); + if (IS_ERR(buf)) + return; + rd_write_section(rd, RD_BUFFER_CONTENTS, buf, size); msm_gem_put_vaddr(&obj->base); } /* called under struct_mutex */ -void msm_rd_dump_submit(struct msm_gem_submit *submit) +void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit, + const char *fmt, ...) { struct drm_device *dev = submit->dev; - struct msm_drm_private *priv = dev->dev_private; - struct msm_rd_state *rd = priv->rd; - char msg[128]; + struct task_struct *task; + char msg[256]; int i, n; if (!rd->open) @@ -303,23 +362,32 @@ void msm_rd_dump_submit(struct msm_gem_submit *submit) */ WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - n = snprintf(msg, sizeof(msg), "%.*s/%d: fence=%u", - TASK_COMM_LEN, current->comm, task_pid_nr(current), - submit->fence->seqno); + if (fmt) { + va_list args; - rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4)); + va_start(args, fmt); + n = vsnprintf(msg, sizeof(msg), fmt, args); + va_end(args); - if (rd_full) { - for (i = 0; i < submit->nr_bos; i++) { - /* buffers that are written to probably don't start out - * with anything interesting: - */ - if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) - continue; + rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4)); + } - snapshot_buf(rd, submit, i, 0, 0); - } + rcu_read_lock(); + task = pid_task(submit->pid, PIDTYPE_PID); + if (task) { + n = snprintf(msg, sizeof(msg), "%.*s/%d: fence=%u", + TASK_COMM_LEN, task->comm, + pid_nr(submit->pid), submit->seqno); + } else { + n = snprintf(msg, sizeof(msg), "???/%d: fence=%u", + pid_nr(submit->pid), submit->seqno); } + rcu_read_unlock(); + + rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4)); + + for (i = 0; rd_full && i < submit->nr_bos; i++) + snapshot_buf(rd, submit, i, 0, 0); for (i = 0; i < submit->nr_cmds; i++) { uint64_t iova = submit->cmd[i].iova; diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index bf065a540130..6ca98da35f63 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -18,13 +18,15 @@ #include "msm_ringbuffer.h" #include "msm_gpu.h" -struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) +struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, + void *memptrs, uint64_t memptrs_iova) { struct msm_ringbuffer *ring; + char name[32]; int ret; - if (WARN_ON(!is_power_of_2(size))) - return ERR_PTR(-EINVAL); + /* We assume everwhere that MSM_GPU_RINGBUFFER_SZ is a power of 2 */ + BUILD_BUG_ON(!is_power_of_2(MSM_GPU_RINGBUFFER_SZ)); ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) { @@ -33,32 +35,46 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) } ring->gpu = gpu; - + ring->id = id; /* Pass NULL for the iova pointer - we will map it later */ - ring->start = msm_gem_kernel_new(gpu->dev, size, MSM_BO_WC, - gpu->aspace, &ring->bo, NULL); + ring->start = msm_gem_kernel_new(gpu->dev, MSM_GPU_RINGBUFFER_SZ, + MSM_BO_WC, gpu->aspace, &ring->bo, NULL); if (IS_ERR(ring->start)) { ret = PTR_ERR(ring->start); ring->start = 0; goto fail; } - ring->end = ring->start + (size / 4); + ring->end = ring->start + (MSM_GPU_RINGBUFFER_SZ >> 2); + ring->next = ring->start; ring->cur = ring->start; - ring->size = size; + ring->memptrs = memptrs; + ring->memptrs_iova = memptrs_iova; + + INIT_LIST_HEAD(&ring->submits); + spin_lock_init(&ring->lock); + + snprintf(name, sizeof(name), "gpu-ring-%d", ring->id); + + ring->fctx = msm_fence_context_alloc(gpu->dev, name); return ring; fail: - if (ring) - msm_ringbuffer_destroy(ring); + msm_ringbuffer_destroy(ring); return ERR_PTR(ret); } void msm_ringbuffer_destroy(struct msm_ringbuffer *ring) { + if (IS_ERR_OR_NULL(ring)) + return; + + msm_fence_context_free(ring->fctx); + if (ring->bo) { + msm_gem_put_iova(ring->bo, ring->gpu->aspace); msm_gem_put_vaddr(ring->bo); drm_gem_object_unreference_unlocked(ring->bo); } diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h index 6e0e1049fa4f..cffce094aecb 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.h +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -20,14 +20,31 @@ #include "msm_drv.h" +#define rbmemptr(ring, member) \ + ((ring)->memptrs_iova + offsetof(struct msm_rbmemptrs, member)) + +struct msm_rbmemptrs { + volatile uint32_t rptr; + volatile uint32_t fence; +}; + struct msm_ringbuffer { struct msm_gpu *gpu; - int size; + int id; struct drm_gem_object *bo; - uint32_t *start, *end, *cur; + uint32_t *start, *end, *cur, *next; + struct list_head submits; + uint64_t iova; + uint32_t seqno; + uint32_t hangcheck_fence; + struct msm_rbmemptrs *memptrs; + uint64_t memptrs_iova; + struct msm_fence_context *fctx; + spinlock_t lock; }; -struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size); +struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, + void *memptrs, uint64_t memptrs_iova); void msm_ringbuffer_destroy(struct msm_ringbuffer *ring); /* ringbuffer helpers (the parts that are same for a3xx/a2xx/z180..) */ @@ -35,9 +52,13 @@ void msm_ringbuffer_destroy(struct msm_ringbuffer *ring); static inline void OUT_RING(struct msm_ringbuffer *ring, uint32_t data) { - if (ring->cur == ring->end) - ring->cur = ring->start; - *(ring->cur++) = data; + /* + * ring->next points to the current command being written - it won't be + * committed as ring->cur until the flush + */ + if (ring->next == ring->end) + ring->next = ring->start; + *(ring->next++) = data; } #endif /* __MSM_RINGBUFFER_H__ */ diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c new file mode 100644 index 000000000000..5115f75b5b7f --- /dev/null +++ b/drivers/gpu/drm/msm/msm_submitqueue.c @@ -0,0 +1,152 @@ +/* Copyright (c) 2017 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/kref.h> +#include "msm_gpu.h" + +void msm_submitqueue_destroy(struct kref *kref) +{ + struct msm_gpu_submitqueue *queue = container_of(kref, + struct msm_gpu_submitqueue, ref); + + kfree(queue); +} + +struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx, + u32 id) +{ + struct msm_gpu_submitqueue *entry; + + if (!ctx) + return NULL; + + read_lock(&ctx->queuelock); + + list_for_each_entry(entry, &ctx->submitqueues, node) { + if (entry->id == id) { + kref_get(&entry->ref); + read_unlock(&ctx->queuelock); + + return entry; + } + } + + read_unlock(&ctx->queuelock); + return NULL; +} + +void msm_submitqueue_close(struct msm_file_private *ctx) +{ + struct msm_gpu_submitqueue *entry, *tmp; + + if (!ctx) + return; + + /* + * No lock needed in close and there won't + * be any more user ioctls coming our way + */ + list_for_each_entry_safe(entry, tmp, &ctx->submitqueues, node) + msm_submitqueue_put(entry); +} + +int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, + u32 prio, u32 flags, u32 *id) +{ + struct msm_drm_private *priv = drm->dev_private; + struct msm_gpu_submitqueue *queue; + + if (!ctx) + return -ENODEV; + + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + + if (!queue) + return -ENOMEM; + + kref_init(&queue->ref); + queue->flags = flags; + + if (priv->gpu) { + if (prio >= priv->gpu->nr_rings) + return -EINVAL; + + queue->prio = prio; + } + + write_lock(&ctx->queuelock); + + queue->id = ctx->queueid++; + + if (id) + *id = queue->id; + + list_add_tail(&queue->node, &ctx->submitqueues); + + write_unlock(&ctx->queuelock); + + return 0; +} + +int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx) +{ + struct msm_drm_private *priv = drm->dev_private; + int default_prio; + + if (!ctx) + return 0; + + /* + * Select priority 2 as the "default priority" unless nr_rings is less + * than 2 and then pick the lowest pirority + */ + default_prio = priv->gpu ? + clamp_t(uint32_t, 2, 0, priv->gpu->nr_rings - 1) : 0; + + INIT_LIST_HEAD(&ctx->submitqueues); + + rwlock_init(&ctx->queuelock); + + return msm_submitqueue_create(drm, ctx, default_prio, 0, NULL); +} + +int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id) +{ + struct msm_gpu_submitqueue *entry; + + if (!ctx) + return 0; + + /* + * id 0 is the "default" queue and can't be destroyed + * by the user + */ + if (!id) + return -ENOENT; + + write_lock(&ctx->queuelock); + + list_for_each_entry(entry, &ctx->submitqueues, node) { + if (entry->id == id) { + list_del(&entry->node); + write_unlock(&ctx->queuelock); + + msm_submitqueue_put(entry); + return 0; + } + } + + write_unlock(&ctx->queuelock); + return -ENOENT; +} + diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild index 2e9ce53ae3a8..9c0c650655e9 100644 --- a/drivers/gpu/drm/nouveau/Kbuild +++ b/drivers/gpu/drm/nouveau/Kbuild @@ -30,9 +30,11 @@ nouveau-y += nouveau_vga.o # DRM - memory management nouveau-y += nouveau_bo.o nouveau-y += nouveau_gem.o +nouveau-y += nouveau_mem.o nouveau-y += nouveau_prime.o nouveau-y += nouveau_sgdma.o nouveau-y += nouveau_ttm.o +nouveau-y += nouveau_vmm.o # DRM - modesetting nouveau-$(CONFIG_DRM_NOUVEAU_BACKLIGHT) += nouveau_backlight.o diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index c02a13406a81..4b75ad40dd80 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -56,6 +56,13 @@ config NOUVEAU_DEBUG_DEFAULT help Selects the default debug level +config NOUVEAU_DEBUG_MMU + bool "Enable additional MMU debugging" + depends on DRM_NOUVEAU + default n + help + Say Y here if you want to enable verbose MMU debug output. + config DRM_NOUVEAU_BACKLIGHT bool "Support for backlight control" depends on DRM_NOUVEAU diff --git a/drivers/gpu/drm/nouveau/dispnv04/cursor.c b/drivers/gpu/drm/nouveau/dispnv04/cursor.c index f26e44ea7389..ebf860bd59af 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/cursor.c +++ b/drivers/gpu/drm/nouveau/dispnv04/cursor.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <drm/drmP.h> #include <drm/drm_mode.h> #include "nouveau_drv.h" diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c index 5b9d549aa791..501d2d290e9c 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c @@ -48,7 +48,7 @@ nv04_display_create(struct drm_device *dev) if (!disp) return -ENOMEM; - nvif_object_map(&drm->client.device.object); + nvif_object_map(&drm->client.device.object, NULL, 0); nouveau_display(dev)->priv = disp; nouveau_display(dev)->dtor = nv04_display_destroy; diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.h b/drivers/gpu/drm/nouveau/dispnv04/disp.h index 74a8795c2c2b..f74f1f2b186e 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/disp.h +++ b/drivers/gpu/drm/nouveau/dispnv04/disp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV04_DISPLAY_H__ #define __NV04_DISPLAY_H__ #include <subdev/bios.h> diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl0002.h b/drivers/gpu/drm/nouveau/include/nvif/cl0002.h index 6d72ed38da32..1a8b45b4631f 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl0002.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl0002.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_CL0002_H__ #define __NVIF_CL0002_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl0046.h b/drivers/gpu/drm/nouveau/include/nvif/cl0046.h index a6a71f4ad91e..c0d5eba4f8fc 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl0046.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl0046.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_CL0046_H__ #define __NVIF_CL0046_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl006b.h b/drivers/gpu/drm/nouveau/include/nvif/cl006b.h index 309ab8a3d9e8..d0e8f35d9e92 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl006b.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl006b.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_CL006B_H__ #define __NVIF_CL006B_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h index 287a7d6fa480..2740278d226b 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_CL0080_H__ #define __NVIF_CL0080_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl506e.h b/drivers/gpu/drm/nouveau/include/nvif/cl506e.h index aa94b8cf9679..989690fe3cd8 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl506e.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl506e.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_CL506E_H__ #define __NVIF_CL506E_H__ @@ -5,7 +6,7 @@ struct nv50_channel_dma_v0 { __u8 version; __u8 chid; __u8 pad02[6]; - __u64 vm; + __u64 vmm; __u64 pushbuf; __u64 offset; }; diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl506f.h b/drivers/gpu/drm/nouveau/include/nvif/cl506f.h index 3b7101966de4..5137b6879abd 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl506f.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl506f.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_CL506F_H__ #define __NVIF_CL506F_H__ @@ -8,6 +9,6 @@ struct nv50_channel_gpfifo_v0 { __u32 ilength; __u64 ioffset; __u64 pushbuf; - __u64 vm; + __u64 vmm; }; #endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl5070.h b/drivers/gpu/drm/nouveau/include/nvif/cl5070.h index 542d95145a67..7cdf53615d7b 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl5070.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl5070.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_CL5070_H__ #define __NVIF_CL5070_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl507a.h b/drivers/gpu/drm/nouveau/include/nvif/cl507a.h index 12e0643b78bd..36e537218596 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl507a.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl507a.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_CL507A_H__ #define __NVIF_CL507A_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl507b.h b/drivers/gpu/drm/nouveau/include/nvif/cl507b.h index 99e9d8c47f60..3e643b752bfc 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl507b.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl507b.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_CL507B_H__ #define __NVIF_CL507B_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl507c.h b/drivers/gpu/drm/nouveau/include/nvif/cl507c.h index 6af70dbdfd9f..fd9e336d0a24 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl507c.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl507c.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_CL507C_H__ #define __NVIF_CL507C_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl507d.h b/drivers/gpu/drm/nouveau/include/nvif/cl507d.h index 5ab0c9e4c6a3..e994c6894e3e 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl507d.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl507d.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_CL507D_H__ #define __NVIF_CL507D_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl507e.h b/drivers/gpu/drm/nouveau/include/nvif/cl507e.h index c06209f3cac4..8082d2fde248 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl507e.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl507e.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_CL507E_H__ #define __NVIF_CL507E_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl826e.h b/drivers/gpu/drm/nouveau/include/nvif/cl826e.h index 91e33db21a2f..1a875090b251 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl826e.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl826e.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_CL826E_H__ #define __NVIF_CL826E_H__ @@ -5,7 +6,7 @@ struct g82_channel_dma_v0 { __u8 version; __u8 chid; __u8 pad02[6]; - __u64 vm; + __u64 vmm; __u64 pushbuf; __u64 offset; }; diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl826f.h b/drivers/gpu/drm/nouveau/include/nvif/cl826f.h index e34efd4ec537..e4e50cfe88f1 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl826f.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl826f.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_CL826F_H__ #define __NVIF_CL826F_H__ @@ -8,7 +9,7 @@ struct g82_channel_gpfifo_v0 { __u32 ilength; __u64 ioffset; __u64 pushbuf; - __u64 vm; + __u64 vmm; }; #define NV826F_V0_NTFY_NON_STALL_INTERRUPT 0x00 diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl906f.h b/drivers/gpu/drm/nouveau/include/nvif/cl906f.h index a2d5410a491b..ab0fa8adb756 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl906f.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl906f.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_CL906F_H__ #define __NVIF_CL906F_H__ @@ -7,7 +8,7 @@ struct fermi_channel_gpfifo_v0 { __u8 pad02[2]; __u32 ilength; __u64 ioffset; - __u64 vm; + __u64 vmm; }; #define NV906F_V0_NTFY_NON_STALL_INTERRUPT 0x00 diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl9097.h b/drivers/gpu/drm/nouveau/include/nvif/cl9097.h index 4057676d2981..e4c8de6d00b7 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl9097.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl9097.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_CL9097_H__ #define __NVIF_CL9097_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h index 2efa3d048bb9..56f5bd81e480 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_CLA06F_H__ #define __NVIF_CLA06F_H__ @@ -22,7 +23,7 @@ struct kepler_channel_gpfifo_a_v0 { __u32 engines; __u32 ilength; __u64 ioffset; - __u64 vm; + __u64 vmm; }; #define NVA06F_V0_NTFY_NON_STALL_INTERRUPT 0x00 diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h index d08da82ba7ed..a7c5bf572788 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/class.h +++ b/drivers/gpu/drm/nouveau/include/nvif/class.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_CLASS_H__ #define __NVIF_CLASS_H__ @@ -14,6 +15,23 @@ #define NVIF_CLASS_SW_NV50 /* if0005.h */ -0x00000006 #define NVIF_CLASS_SW_GF100 /* if0005.h */ -0x00000007 +#define NVIF_CLASS_MMU /* if0008.h */ 0x80000008 +#define NVIF_CLASS_MMU_NV04 /* if0008.h */ 0x80000009 +#define NVIF_CLASS_MMU_NV50 /* if0008.h */ 0x80005009 +#define NVIF_CLASS_MMU_GF100 /* if0008.h */ 0x80009009 + +#define NVIF_CLASS_MEM /* if000a.h */ 0x8000000a +#define NVIF_CLASS_MEM_NV04 /* if000b.h */ 0x8000000b +#define NVIF_CLASS_MEM_NV50 /* if500b.h */ 0x8000500b +#define NVIF_CLASS_MEM_GF100 /* if900b.h */ 0x8000900b + +#define NVIF_CLASS_VMM /* if000c.h */ 0x8000000c +#define NVIF_CLASS_VMM_NV04 /* if000d.h */ 0x8000000d +#define NVIF_CLASS_VMM_NV50 /* if500d.h */ 0x8000500d +#define NVIF_CLASS_VMM_GF100 /* if900d.h */ 0x8000900d +#define NVIF_CLASS_VMM_GM200 /* ifb00d.h */ 0x8000b00d +#define NVIF_CLASS_VMM_GP100 /* ifc00d.h */ 0x8000c00d + /* the below match nvidia-assigned (either in hw, or sw) class numbers */ #define NV_NULL_CLASS 0x00000030 diff --git a/drivers/gpu/drm/nouveau/include/nvif/client.h b/drivers/gpu/drm/nouveau/include/nvif/client.h index b52a8eadce01..f5df8b30c599 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/client.h +++ b/drivers/gpu/drm/nouveau/include/nvif/client.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_CLIENT_H__ #define __NVIF_CLIENT_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvif/device.h b/drivers/gpu/drm/nouveau/include/nvif/device.h index bcb981711617..6edb6266857e 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/device.h +++ b/drivers/gpu/drm/nouveau/include/nvif/device.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_DEVICE_H__ #define __NVIF_DEVICE_H__ @@ -38,7 +39,6 @@ u64 nvif_device_time(struct nvif_device *); /*XXX*/ #include <subdev/bios.h> #include <subdev/fb.h> -#include <subdev/mmu.h> #include <subdev/bar.h> #include <subdev/gpio.h> #include <subdev/clk.h> @@ -57,8 +57,6 @@ u64 nvif_device_time(struct nvif_device *); }) #define nvxx_bios(a) nvxx_device(a)->bios #define nvxx_fb(a) nvxx_device(a)->fb -#define nvxx_mmu(a) nvxx_device(a)->mmu -#define nvxx_bar(a) nvxx_device(a)->bar #define nvxx_gpio(a) nvxx_device(a)->gpio #define nvxx_clk(a) nvxx_device(a)->clk #define nvxx_i2c(a) nvxx_device(a)->i2c @@ -66,10 +64,8 @@ u64 nvif_device_time(struct nvif_device *); #define nvxx_therm(a) nvxx_device(a)->therm #define nvxx_volt(a) nvxx_device(a)->volt -#include <core/device.h> #include <engine/fifo.h> #include <engine/gr.h> -#include <engine/sw.h> #define nvxx_fifo(a) nvxx_device(a)->fifo #define nvxx_gr(a) nvxx_device(a)->gr diff --git a/drivers/gpu/drm/nouveau/include/nvif/driver.h b/drivers/gpu/drm/nouveau/include/nvif/driver.h index 0c6f48d8140a..93bccd45a042 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/driver.h +++ b/drivers/gpu/drm/nouveau/include/nvif/driver.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_DRIVER_H__ #define __NVIF_DRIVER_H__ #include <nvif/os.h> diff --git a/drivers/gpu/drm/nouveau/include/nvif/event.h b/drivers/gpu/drm/nouveau/include/nvif/event.h index 21764499b4be..ec5c924f576a 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/event.h +++ b/drivers/gpu/drm/nouveau/include/nvif/event.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_EVENT_H__ #define __NVIF_EVENT_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvif/if0000.h b/drivers/gpu/drm/nouveau/include/nvif/if0000.h index c2c0fc41e017..30ecd31db5df 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/if0000.h +++ b/drivers/gpu/drm/nouveau/include/nvif/if0000.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_IF0000_H__ #define __NVIF_IF0000_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvif/if0001.h b/drivers/gpu/drm/nouveau/include/nvif/if0001.h index bd5b64125eed..ca9215262215 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/if0001.h +++ b/drivers/gpu/drm/nouveau/include/nvif/if0001.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_IF0001_H__ #define __NVIF_IF0001_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvif/if0002.h b/drivers/gpu/drm/nouveau/include/nvif/if0002.h index c04c91d0b818..d9235c011196 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/if0002.h +++ b/drivers/gpu/drm/nouveau/include/nvif/if0002.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_IF0002_H__ #define __NVIF_IF0002_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvif/if0003.h b/drivers/gpu/drm/nouveau/include/nvif/if0003.h index 0cd03efb80a1..ae30b8261b88 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/if0003.h +++ b/drivers/gpu/drm/nouveau/include/nvif/if0003.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_IF0003_H__ #define __NVIF_IF0003_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvif/if0004.h b/drivers/gpu/drm/nouveau/include/nvif/if0004.h index bd5cd428cfd7..b35547c8ea36 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/if0004.h +++ b/drivers/gpu/drm/nouveau/include/nvif/if0004.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_IF0004_H__ #define __NVIF_IF0004_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvif/if0005.h b/drivers/gpu/drm/nouveau/include/nvif/if0005.h index abfd373bb68b..8ed0ae101715 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/if0005.h +++ b/drivers/gpu/drm/nouveau/include/nvif/if0005.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_IF0005_H__ #define __NVIF_IF0005_H__ #define NV10_NVSW_NTFY_UEVENT 0x00 diff --git a/drivers/gpu/drm/nouveau/include/nvif/if0008.h b/drivers/gpu/drm/nouveau/include/nvif/if0008.h new file mode 100644 index 000000000000..8450127420f5 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/if0008.h @@ -0,0 +1,42 @@ +#ifndef __NVIF_IF0008_H__ +#define __NVIF_IF0008_H__ +struct nvif_mmu_v0 { + __u8 version; + __u8 dmabits; + __u8 heap_nr; + __u8 type_nr; + __u16 kind_nr; +}; + +#define NVIF_MMU_V0_HEAP 0x00 +#define NVIF_MMU_V0_TYPE 0x01 +#define NVIF_MMU_V0_KIND 0x02 + +struct nvif_mmu_heap_v0 { + __u8 version; + __u8 index; + __u8 pad02[6]; + __u64 size; +}; + +struct nvif_mmu_type_v0 { + __u8 version; + __u8 index; + __u8 heap; + __u8 vram; + __u8 host; + __u8 comp; + __u8 disp; + __u8 kind; + __u8 mappable; + __u8 coherent; + __u8 uncached; +}; + +struct nvif_mmu_kind_v0 { + __u8 version; + __u8 pad01[1]; + __u16 count; + __u8 data[]; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000a.h b/drivers/gpu/drm/nouveau/include/nvif/if000a.h new file mode 100644 index 000000000000..88d0938fbd5a --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/if000a.h @@ -0,0 +1,22 @@ +#ifndef __NVIF_IF000A_H__ +#define __NVIF_IF000A_H__ +struct nvif_mem_v0 { + __u8 version; + __u8 type; + __u8 page; + __u8 pad03[5]; + __u64 size; + __u64 addr; + __u8 data[]; +}; + +struct nvif_mem_ram_vn { +}; + +struct nvif_mem_ram_v0 { + __u8 version; + __u8 pad01[7]; + dma_addr_t *dma; + struct scatterlist *sgl; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000b.h b/drivers/gpu/drm/nouveau/include/nvif/if000b.h new file mode 100644 index 000000000000..c677fb0293da --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/if000b.h @@ -0,0 +1,11 @@ +#ifndef __NVIF_IF000B_H__ +#define __NVIF_IF000B_H__ +#include "if000a.h" + +struct nv04_mem_vn { + /* nvkm_mem_vX ... */ +}; + +struct nv04_mem_map_vn { +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000c.h b/drivers/gpu/drm/nouveau/include/nvif/if000c.h new file mode 100644 index 000000000000..2928ecd989ad --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/if000c.h @@ -0,0 +1,64 @@ +#ifndef __NVIF_IF000C_H__ +#define __NVIF_IF000C_H__ +struct nvif_vmm_v0 { + __u8 version; + __u8 page_nr; + __u8 pad02[6]; + __u64 addr; + __u64 size; + __u8 data[]; +}; + +#define NVIF_VMM_V0_PAGE 0x00 +#define NVIF_VMM_V0_GET 0x01 +#define NVIF_VMM_V0_PUT 0x02 +#define NVIF_VMM_V0_MAP 0x03 +#define NVIF_VMM_V0_UNMAP 0x04 + +struct nvif_vmm_page_v0 { + __u8 version; + __u8 index; + __u8 shift; + __u8 sparse; + __u8 vram; + __u8 host; + __u8 comp; + __u8 pad07[1]; +}; + +struct nvif_vmm_get_v0 { + __u8 version; +#define NVIF_VMM_GET_V0_ADDR 0x00 +#define NVIF_VMM_GET_V0_PTES 0x01 +#define NVIF_VMM_GET_V0_LAZY 0x02 + __u8 type; + __u8 sparse; + __u8 page; + __u8 align; + __u8 pad05[3]; + __u64 size; + __u64 addr; +}; + +struct nvif_vmm_put_v0 { + __u8 version; + __u8 pad01[7]; + __u64 addr; +}; + +struct nvif_vmm_map_v0 { + __u8 version; + __u8 pad01[7]; + __u64 addr; + __u64 size; + __u64 memory; + __u64 offset; + __u8 data[]; +}; + +struct nvif_vmm_unmap_v0 { + __u8 version; + __u8 pad01[7]; + __u64 addr; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000d.h b/drivers/gpu/drm/nouveau/include/nvif/if000d.h new file mode 100644 index 000000000000..516ec9401401 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/if000d.h @@ -0,0 +1,12 @@ +#ifndef __NVIF_IF000D_H__ +#define __NVIF_IF000D_H__ +#include "if000c.h" + +struct nv04_vmm_vn { + /* nvif_vmm_vX ... */ +}; + +struct nv04_vmm_map_vn { + /* nvif_vmm_map_vX ... */ +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/if500b.h b/drivers/gpu/drm/nouveau/include/nvif/if500b.h new file mode 100644 index 000000000000..c7c8431fb2ce --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/if500b.h @@ -0,0 +1,25 @@ +#ifndef __NVIF_IF500B_H__ +#define __NVIF_IF500B_H__ +#include "if000a.h" + +struct nv50_mem_vn { + /* nvif_mem_vX ... */ +}; + +struct nv50_mem_v0 { + /* nvif_mem_vX ... */ + __u8 version; + __u8 bankswz; + __u8 contig; +}; + +struct nv50_mem_map_vn { +}; + +struct nv50_mem_map_v0 { + __u8 version; + __u8 ro; + __u8 kind; + __u8 comp; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/if500d.h b/drivers/gpu/drm/nouveau/include/nvif/if500d.h new file mode 100644 index 000000000000..c29a7822b363 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/if500d.h @@ -0,0 +1,21 @@ +#ifndef __NVIF_IF500D_H__ +#define __NVIF_IF500D_H__ +#include "if000c.h" + +struct nv50_vmm_vn { + /* nvif_vmm_vX ... */ +}; + +struct nv50_vmm_map_vn { + /* nvif_vmm_map_vX ... */ +}; + +struct nv50_vmm_map_v0 { + /* nvif_vmm_map_vX ... */ + __u8 version; + __u8 ro; + __u8 priv; + __u8 kind; + __u8 comp; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/if900b.h b/drivers/gpu/drm/nouveau/include/nvif/if900b.h new file mode 100644 index 000000000000..9b164548eea8 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/if900b.h @@ -0,0 +1,23 @@ +#ifndef __NVIF_IF900B_H__ +#define __NVIF_IF900B_H__ +#include "if000a.h" + +struct gf100_mem_vn { + /* nvif_mem_vX ... */ +}; + +struct gf100_mem_v0 { + /* nvif_mem_vX ... */ + __u8 version; + __u8 contig; +}; + +struct gf100_mem_map_vn { +}; + +struct gf100_mem_map_v0 { + __u8 version; + __u8 ro; + __u8 kind; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/if900d.h b/drivers/gpu/drm/nouveau/include/nvif/if900d.h new file mode 100644 index 000000000000..49aa50583c3d --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/if900d.h @@ -0,0 +1,21 @@ +#ifndef __NVIF_IF900D_H__ +#define __NVIF_IF900D_H__ +#include "if000c.h" + +struct gf100_vmm_vn { + /* nvif_vmm_vX ... */ +}; + +struct gf100_vmm_map_vn { + /* nvif_vmm_map_vX ... */ +}; + +struct gf100_vmm_map_v0 { + /* nvif_vmm_map_vX ... */ + __u8 version; + __u8 vol; + __u8 ro; + __u8 priv; + __u8 kind; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/ifb00d.h b/drivers/gpu/drm/nouveau/include/nvif/ifb00d.h new file mode 100644 index 000000000000..a0e419830595 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/ifb00d.h @@ -0,0 +1,27 @@ +#ifndef __NVIF_IFB00D_H__ +#define __NVIF_IFB00D_H__ +#include "if000c.h" + +struct gm200_vmm_vn { + /* nvif_vmm_vX ... */ +}; + +struct gm200_vmm_v0 { + /* nvif_vmm_vX ... */ + __u8 version; + __u8 bigpage; +}; + +struct gm200_vmm_map_vn { + /* nvif_vmm_map_vX ... */ +}; + +struct gm200_vmm_map_v0 { + /* nvif_vmm_map_vX ... */ + __u8 version; + __u8 vol; + __u8 ro; + __u8 priv; + __u8 kind; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h b/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h new file mode 100644 index 000000000000..1d9c637859f3 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h @@ -0,0 +1,21 @@ +#ifndef __NVIF_IFC00D_H__ +#define __NVIF_IFC00D_H__ +#include "if000c.h" + +struct gp100_vmm_vn { + /* nvif_vmm_vX ... */ +}; + +struct gp100_vmm_map_vn { + /* nvif_vmm_map_vX ... */ +}; + +struct gp100_vmm_map_v0 { + /* nvif_vmm_map_vX ... */ + __u8 version; + __u8 vol; + __u8 ro; + __u8 priv; + __u8 kind; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/ioctl.h b/drivers/gpu/drm/nouveau/include/nvif/ioctl.h index c5f5eb83a594..b93d586a2304 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/ioctl.h +++ b/drivers/gpu/drm/nouveau/include/nvif/ioctl.h @@ -1,7 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_IOCTL_H__ #define __NVIF_IOCTL_H__ -#define NVIF_VERSION_LATEST 0x0000000000000000ULL +#define NVIF_VERSION_LATEST 0x0000000000000100ULL struct nvif_ioctl_v0 { __u8 version; @@ -83,9 +84,13 @@ struct nvif_ioctl_wr_v0 { struct nvif_ioctl_map_v0 { /* nvif_ioctl ... */ __u8 version; - __u8 pad01[3]; - __u32 length; +#define NVIF_IOCTL_MAP_V0_IO 0x00 +#define NVIF_IOCTL_MAP_V0_VA 0x01 + __u8 type; + __u8 pad02[6]; __u64 handle; + __u64 length; + __u8 data[]; }; struct nvif_ioctl_unmap { diff --git a/drivers/gpu/drm/nouveau/include/nvif/mem.h b/drivers/gpu/drm/nouveau/include/nvif/mem.h new file mode 100644 index 000000000000..b542fe38398e --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/mem.h @@ -0,0 +1,18 @@ +#ifndef __NVIF_MEM_H__ +#define __NVIF_MEM_H__ +#include "mmu.h" + +struct nvif_mem { + struct nvif_object object; + u8 type; + u8 page; + u64 addr; + u64 size; +}; + +int nvif_mem_init_type(struct nvif_mmu *mmu, s32 oclass, int type, u8 page, + u64 size, void *argv, u32 argc, struct nvif_mem *); +int nvif_mem_init(struct nvif_mmu *mmu, s32 oclass, u8 type, u8 page, + u64 size, void *argv, u32 argc, struct nvif_mem *); +void nvif_mem_fini(struct nvif_mem *); +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/mmu.h b/drivers/gpu/drm/nouveau/include/nvif/mmu.h new file mode 100644 index 000000000000..c8cd5b5b0688 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/mmu.h @@ -0,0 +1,56 @@ +#ifndef __NVIF_MMU_H__ +#define __NVIF_MMU_H__ +#include <nvif/object.h> + +struct nvif_mmu { + struct nvif_object object; + u8 dmabits; + u8 heap_nr; + u8 type_nr; + u16 kind_nr; + + struct { + u64 size; + } *heap; + + struct { +#define NVIF_MEM_VRAM 0x01 +#define NVIF_MEM_HOST 0x02 +#define NVIF_MEM_COMP 0x04 +#define NVIF_MEM_DISP 0x08 +#define NVIF_MEM_KIND 0x10 +#define NVIF_MEM_MAPPABLE 0x20 +#define NVIF_MEM_COHERENT 0x40 +#define NVIF_MEM_UNCACHED 0x80 + u8 type; + u8 heap; + } *type; + + u8 *kind; +}; + +int nvif_mmu_init(struct nvif_object *, s32 oclass, struct nvif_mmu *); +void nvif_mmu_fini(struct nvif_mmu *); + +static inline bool +nvif_mmu_kind_valid(struct nvif_mmu *mmu, u8 kind) +{ + const u8 invalid = mmu->kind_nr - 1; + if (kind) { + if (kind >= mmu->kind_nr || mmu->kind[kind] == invalid) + return false; + } + return true; +} + +static inline int +nvif_mmu_type(struct nvif_mmu *mmu, u8 mask) +{ + int i; + for (i = 0; i < mmu->type_nr; i++) { + if ((mmu->type[i].type & mask) == mask) + return i; + } + return -EINVAL; +} +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/notify.h b/drivers/gpu/drm/nouveau/include/nvif/notify.h index 51e2eb580809..4ed169230657 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/notify.h +++ b/drivers/gpu/drm/nouveau/include/nvif/notify.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_NOTIFY_H__ #define __NVIF_NOTIFY_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvif/object.h b/drivers/gpu/drm/nouveau/include/nvif/object.h index 9e58b305b020..a2d5244ff2b7 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/object.h +++ b/drivers/gpu/drm/nouveau/include/nvif/object.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_OBJECT_H__ #define __NVIF_OBJECT_H__ @@ -16,7 +17,7 @@ struct nvif_object { void *priv; /*XXX: hack */ struct { void __iomem *ptr; - u32 size; + u64 size; } map; }; @@ -29,7 +30,10 @@ void nvif_object_sclass_put(struct nvif_sclass **); u32 nvif_object_rd(struct nvif_object *, int, u64); void nvif_object_wr(struct nvif_object *, int, u64, u32); int nvif_object_mthd(struct nvif_object *, u32, void *, u32); -int nvif_object_map(struct nvif_object *); +int nvif_object_map_handle(struct nvif_object *, void *, u32, + u64 *handle, u64 *length); +void nvif_object_unmap_handle(struct nvif_object *); +int nvif_object_map(struct nvif_object *, void *, u32); void nvif_object_unmap(struct nvif_object *); #define nvif_handle(a) (unsigned long)(void *)(a) diff --git a/drivers/gpu/drm/nouveau/include/nvif/os.h b/drivers/gpu/drm/nouveau/include/nvif/os.h index 9fcab67c8557..fd09b2842972 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/os.h +++ b/drivers/gpu/drm/nouveau/include/nvif/os.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NOUVEAU_OS_H__ #define __NOUVEAU_OS_H__ @@ -33,18 +34,4 @@ #include <soc/tegra/fuse.h> #include <soc/tegra/pmc.h> - -#ifndef ioread32_native -#ifdef __BIG_ENDIAN -#define ioread16_native ioread16be -#define iowrite16_native iowrite16be -#define ioread32_native ioread32be -#define iowrite32_native iowrite32be -#else /* def __BIG_ENDIAN */ -#define ioread16_native ioread16 -#define iowrite16_native iowrite16 -#define ioread32_native ioread32 -#define iowrite32_native iowrite32 -#endif /* def __BIG_ENDIAN else */ -#endif /* !ioread32_native */ #endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/unpack.h b/drivers/gpu/drm/nouveau/include/nvif/unpack.h index 751bcf4930a7..7f0d9f6cc1e7 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/unpack.h +++ b/drivers/gpu/drm/nouveau/include/nvif/unpack.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVIF_UNPACK_H__ #define __NVIF_UNPACK_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvif/vmm.h b/drivers/gpu/drm/nouveau/include/nvif/vmm.h new file mode 100644 index 000000000000..c5db8a2e82df --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/vmm.h @@ -0,0 +1,42 @@ +#ifndef __NVIF_VMM_H__ +#define __NVIF_VMM_H__ +#include <nvif/object.h> +struct nvif_mem; +struct nvif_mmu; + +enum nvif_vmm_get { + ADDR, + PTES, + LAZY +}; + +struct nvif_vma { + u64 addr; + u64 size; +}; + +struct nvif_vmm { + struct nvif_object object; + u64 start; + u64 limit; + + struct { + u8 shift; + bool sparse:1; + bool vram:1; + bool host:1; + bool comp:1; + } *page; + int page_nr; +}; + +int nvif_vmm_init(struct nvif_mmu *, s32 oclass, u64 addr, u64 size, + void *argv, u32 argc, struct nvif_vmm *); +void nvif_vmm_fini(struct nvif_vmm *); +int nvif_vmm_get(struct nvif_vmm *, enum nvif_vmm_get, bool sparse, + u8 page, u8 align, u64 size, struct nvif_vma *); +void nvif_vmm_put(struct nvif_vmm *, struct nvif_vma *); +int nvif_vmm_map(struct nvif_vmm *, u64 addr, u64 size, void *argv, u32 argc, + struct nvif_mem *, u64 offset); +int nvif_vmm_unmap(struct nvif_vmm *, u64); +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h index e876634da10a..757fac823a10 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_CLIENT_H__ #define __NVKM_CLIENT_H__ #define nvkm_client(p) container_of((p), struct nvkm_client, object) @@ -16,7 +17,8 @@ struct nvkm_client { void *data; int (*ntfy)(const void *, u32, const void *, u32); - struct nvkm_vm *vm; + struct list_head umem; + spinlock_t lock; }; int nvkm_client_new(const char *name, u64 device, const char *cfg, diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/debug.h b/drivers/gpu/drm/nouveau/include/nvkm/core/debug.h index c59fd4e2ad5e..966d1822dd80 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/debug.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/debug.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_DEBUG_H__ #define __NVKM_DEBUG_H__ #define NV_DBG_FATAL 0 diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h index bb4c214f1046..560265b15ec2 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h @@ -1,7 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_DEVICE_H__ #define __NVKM_DEVICE_H__ +#include <core/oclass.h> #include <core/event.h> -#include <core/object.h> enum nvkm_devidx { NVKM_SUBDEV_PCI, diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h b/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h index d4cd2fbfde88..ebf8473a39fe 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_ENGINE_H__ #define __NVKM_ENGINE_H__ #define nvkm_engine(p) container_of((p), struct nvkm_engine, subdev) @@ -15,6 +16,7 @@ struct nvkm_engine { struct nvkm_engine_func { void *(*dtor)(struct nvkm_engine *); + void (*preinit)(struct nvkm_engine *); int (*oneinit)(struct nvkm_engine *); int (*init)(struct nvkm_engine *); int (*fini)(struct nvkm_engine *, bool suspend); diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/enum.h b/drivers/gpu/drm/nouveau/include/nvkm/core/enum.h index 40429a82f792..38acbde2de4f 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/enum.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/enum.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_ENUM_H__ #define __NVKM_ENUM_H__ #include <core/os.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/event.h b/drivers/gpu/drm/nouveau/include/nvkm/core/event.h index b98fe2de546a..d3c45e90a1c1 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/event.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/event.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_EVENT_H__ #define __NVKM_EVENT_H__ #include <core/os.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/firmware.h b/drivers/gpu/drm/nouveau/include/nvkm/core/firmware.h index a626ce378f04..ff0fa38aee72 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/firmware.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/firmware.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_FIRMWARE_H__ #define __NVKM_FIRMWARE_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/gpuobj.h b/drivers/gpu/drm/nouveau/include/nvkm/core/gpuobj.h index c23da4f05929..10eeaeebc242 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/gpuobj.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/gpuobj.h @@ -1,17 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_GPUOBJ_H__ #define __NVKM_GPUOBJ_H__ -#include <core/object.h> #include <core/memory.h> #include <core/mm.h> -struct nvkm_vma; -struct nvkm_vm; #define NVOBJ_FLAG_ZERO_ALLOC 0x00000001 #define NVOBJ_FLAG_HEAP 0x00000004 struct nvkm_gpuobj { - struct nvkm_object object; - const struct nvkm_gpuobj_func *func; + union { + const struct nvkm_gpuobj_func *func; + const struct nvkm_gpuobj_func *ptrs; + }; struct nvkm_gpuobj *parent; struct nvkm_memory *memory; struct nvkm_mm_node *node; @@ -28,15 +28,14 @@ struct nvkm_gpuobj_func { void (*release)(struct nvkm_gpuobj *); u32 (*rd32)(struct nvkm_gpuobj *, u32 offset); void (*wr32)(struct nvkm_gpuobj *, u32 offset, u32 data); + int (*map)(struct nvkm_gpuobj *, u64 offset, struct nvkm_vmm *, + struct nvkm_vma *, void *argv, u32 argc); }; int nvkm_gpuobj_new(struct nvkm_device *, u32 size, int align, bool zero, struct nvkm_gpuobj *parent, struct nvkm_gpuobj **); void nvkm_gpuobj_del(struct nvkm_gpuobj **); int nvkm_gpuobj_wrap(struct nvkm_memory *, struct nvkm_gpuobj **); -int nvkm_gpuobj_map(struct nvkm_gpuobj *, struct nvkm_vm *, u32 access, - struct nvkm_vma *); -void nvkm_gpuobj_unmap(struct nvkm_vma *); void nvkm_gpuobj_memcpy_to(struct nvkm_gpuobj *dst, u32 dstoffset, void *src, u32 length); void nvkm_gpuobj_memcpy_from(void *dst, struct nvkm_gpuobj *src, u32 srcoffset, diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/ioctl.h b/drivers/gpu/drm/nouveau/include/nvkm/core/ioctl.h index 88971eb37afa..e2d39192fa26 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/ioctl.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/ioctl.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_IOCTL_H__ #define __NVKM_IOCTL_H__ #include <core/os.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h b/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h index 33ca6769266a..05f505de0075 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h @@ -1,9 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_MEMORY_H__ #define __NVKM_MEMORY_H__ #include <core/os.h> struct nvkm_device; struct nvkm_vma; -struct nvkm_vm; +struct nvkm_vmm; + +struct nvkm_tags { + struct nvkm_mm_node *mn; + refcount_t refcount; +}; enum nvkm_memory_target { NVKM_MEM_TARGET_INST, /* instance memory */ @@ -14,41 +20,84 @@ enum nvkm_memory_target { struct nvkm_memory { const struct nvkm_memory_func *func; + const struct nvkm_memory_ptrs *ptrs; + struct kref kref; + struct nvkm_tags *tags; }; struct nvkm_memory_func { void *(*dtor)(struct nvkm_memory *); enum nvkm_memory_target (*target)(struct nvkm_memory *); + u8 (*page)(struct nvkm_memory *); u64 (*addr)(struct nvkm_memory *); u64 (*size)(struct nvkm_memory *); - void (*boot)(struct nvkm_memory *, struct nvkm_vm *); + void (*boot)(struct nvkm_memory *, struct nvkm_vmm *); void __iomem *(*acquire)(struct nvkm_memory *); void (*release)(struct nvkm_memory *); + int (*map)(struct nvkm_memory *, u64 offset, struct nvkm_vmm *, + struct nvkm_vma *, void *argv, u32 argc); +}; + +struct nvkm_memory_ptrs { u32 (*rd32)(struct nvkm_memory *, u64 offset); void (*wr32)(struct nvkm_memory *, u64 offset, u32 data); - void (*map)(struct nvkm_memory *, struct nvkm_vma *, u64 offset); }; void nvkm_memory_ctor(const struct nvkm_memory_func *, struct nvkm_memory *); int nvkm_memory_new(struct nvkm_device *, enum nvkm_memory_target, u64 size, u32 align, bool zero, struct nvkm_memory **); -void nvkm_memory_del(struct nvkm_memory **); +struct nvkm_memory *nvkm_memory_ref(struct nvkm_memory *); +void nvkm_memory_unref(struct nvkm_memory **); +int nvkm_memory_tags_get(struct nvkm_memory *, struct nvkm_device *, u32 tags, + void (*clear)(struct nvkm_device *, u32, u32), + struct nvkm_tags **); +void nvkm_memory_tags_put(struct nvkm_memory *, struct nvkm_device *, + struct nvkm_tags **); + #define nvkm_memory_target(p) (p)->func->target(p) +#define nvkm_memory_page(p) (p)->func->page(p) #define nvkm_memory_addr(p) (p)->func->addr(p) #define nvkm_memory_size(p) (p)->func->size(p) #define nvkm_memory_boot(p,v) (p)->func->boot((p),(v)) -#define nvkm_memory_map(p,v,o) (p)->func->map((p),(v),(o)) +#define nvkm_memory_map(p,o,vm,va,av,ac) \ + (p)->func->map((p),(o),(vm),(va),(av),(ac)) /* accessor macros - kmap()/done() must bracket use of the other accessor * macros to guarantee correct behaviour across all chipsets */ #define nvkm_kmap(o) (o)->func->acquire(o) -#define nvkm_ro32(o,a) (o)->func->rd32((o), (a)) -#define nvkm_wo32(o,a,d) (o)->func->wr32((o), (a), (d)) +#define nvkm_done(o) (o)->func->release(o) + +#define nvkm_ro32(o,a) (o)->ptrs->rd32((o), (a)) +#define nvkm_wo32(o,a,d) (o)->ptrs->wr32((o), (a), (d)) #define nvkm_mo32(o,a,m,d) ({ \ u32 _addr = (a), _data = nvkm_ro32((o), _addr); \ nvkm_wo32((o), _addr, (_data & ~(m)) | (d)); \ _data; \ }) -#define nvkm_done(o) (o)->func->release(o) + +#define nvkm_wo64(o,a,d) do { \ + u64 __a = (a), __d = (d); \ + nvkm_wo32((o), __a + 0, lower_32_bits(__d)); \ + nvkm_wo32((o), __a + 4, upper_32_bits(__d)); \ +} while(0) + +#define nvkm_fill(t,s,o,a,d,c) do { \ + u64 _a = (a), _c = (c), _d = (d), _o = _a >> s, _s = _c << s; \ + u##t __iomem *_m = nvkm_kmap(o); \ + if (likely(_m)) { \ + if (_d) { \ + while (_c--) \ + iowrite##t##_native(_d, &_m[_o++]); \ + } else { \ + memset_io(&_m[_o], _d, _s); \ + } \ + } else { \ + for (; _c; _c--, _a += BIT(s)) \ + nvkm_wo##t((o), _a, _d); \ + } \ + nvkm_done(o); \ +} while(0) +#define nvkm_fo32(o,a,d,c) nvkm_fill(32, 2, (o), (a), (d), (c)) +#define nvkm_fo64(o,a,d,c) nvkm_fill(64, 3, (o), (a), (d), (c)) #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/mm.h b/drivers/gpu/drm/nouveau/include/nvkm/core/mm.h index 7bd4897a8a2a..b0726c39429e 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/mm.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/mm.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_MM_H__ #define __NVKM_MM_H__ #include <core/os.h> @@ -30,7 +31,7 @@ nvkm_mm_initialised(struct nvkm_mm *mm) return mm->heap_nodes; } -int nvkm_mm_init(struct nvkm_mm *, u32 offset, u32 length, u32 block); +int nvkm_mm_init(struct nvkm_mm *, u8 heap, u32 offset, u32 length, u32 block); int nvkm_mm_fini(struct nvkm_mm *); int nvkm_mm_head(struct nvkm_mm *, u8 heap, u8 type, u32 size_max, u32 size_min, u32 align, struct nvkm_mm_node **); @@ -39,9 +40,39 @@ int nvkm_mm_tail(struct nvkm_mm *, u8 heap, u8 type, u32 size_max, void nvkm_mm_free(struct nvkm_mm *, struct nvkm_mm_node **); void nvkm_mm_dump(struct nvkm_mm *, const char *); +static inline u32 +nvkm_mm_heap_size(struct nvkm_mm *mm, u8 heap) +{ + struct nvkm_mm_node *node; + u32 size = 0; + list_for_each_entry(node, &mm->nodes, nl_entry) { + if (node->heap == heap) + size += node->length; + } + return size; +} + static inline bool nvkm_mm_contiguous(struct nvkm_mm_node *node) { return !node->next; } + +static inline u32 +nvkm_mm_addr(struct nvkm_mm_node *node) +{ + if (WARN_ON(!nvkm_mm_contiguous(node))) + return 0; + return node->offset; +} + +static inline u32 +nvkm_mm_size(struct nvkm_mm_node *node) +{ + u32 size = 0; + do { + size += node->length; + } while ((node = node->next)); + return size; +} #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/notify.h b/drivers/gpu/drm/nouveau/include/nvkm/core/notify.h index 753d08c1767b..4eb82bc563f3 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/notify.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/notify.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_NOTIFY_H__ #define __NVKM_NOTIFY_H__ #include <core/os.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/object.h b/drivers/gpu/drm/nouveau/include/nvkm/core/object.h index 96dda350ada3..270f893cc154 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/object.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/object.h @@ -1,10 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_OBJECT_H__ #define __NVKM_OBJECT_H__ -#include <core/os.h> -#include <core/debug.h> +#include <core/oclass.h> struct nvkm_event; struct nvkm_gpuobj; -struct nvkm_oclass; struct nvkm_object { const struct nvkm_object_func *func; @@ -21,13 +20,20 @@ struct nvkm_object { struct rb_node node; }; +enum nvkm_object_map { + NVKM_OBJECT_MAP_IO, + NVKM_OBJECT_MAP_VA +}; + struct nvkm_object_func { void *(*dtor)(struct nvkm_object *); int (*init)(struct nvkm_object *); int (*fini)(struct nvkm_object *, bool suspend); int (*mthd)(struct nvkm_object *, u32 mthd, void *data, u32 size); int (*ntfy)(struct nvkm_object *, u32 mthd, struct nvkm_event **); - int (*map)(struct nvkm_object *, u64 *addr, u32 *size); + int (*map)(struct nvkm_object *, void *argv, u32 argc, + enum nvkm_object_map *, u64 *addr, u64 *size); + int (*unmap)(struct nvkm_object *); int (*rd08)(struct nvkm_object *, u64 addr, u8 *data); int (*rd16)(struct nvkm_object *, u64 addr, u16 *data); int (*rd32)(struct nvkm_object *, u64 addr, u32 *data); @@ -52,7 +58,9 @@ int nvkm_object_init(struct nvkm_object *); int nvkm_object_fini(struct nvkm_object *, bool suspend); int nvkm_object_mthd(struct nvkm_object *, u32 mthd, void *data, u32 size); int nvkm_object_ntfy(struct nvkm_object *, u32 mthd, struct nvkm_event **); -int nvkm_object_map(struct nvkm_object *, u64 *addr, u32 *size); +int nvkm_object_map(struct nvkm_object *, void *argv, u32 argc, + enum nvkm_object_map *, u64 *addr, u64 *size); +int nvkm_object_unmap(struct nvkm_object *); int nvkm_object_rd08(struct nvkm_object *, u64 addr, u8 *data); int nvkm_object_rd16(struct nvkm_object *, u64 addr, u16 *data); int nvkm_object_rd32(struct nvkm_object *, u64 addr, u32 *data); @@ -66,28 +74,4 @@ bool nvkm_object_insert(struct nvkm_object *); void nvkm_object_remove(struct nvkm_object *); struct nvkm_object *nvkm_object_search(struct nvkm_client *, u64 object, const struct nvkm_object_func *); - -struct nvkm_sclass { - int minver; - int maxver; - s32 oclass; - const struct nvkm_object_func *func; - int (*ctor)(const struct nvkm_oclass *, void *data, u32 size, - struct nvkm_object **); -}; - -struct nvkm_oclass { - int (*ctor)(const struct nvkm_oclass *, void *data, u32 size, - struct nvkm_object **); - struct nvkm_sclass base; - const void *priv; - const void *engn; - u32 handle; - u8 route; - u64 token; - u64 object; - struct nvkm_client *client; - struct nvkm_object *parent; - struct nvkm_engine *engine; -}; #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/oclass.h b/drivers/gpu/drm/nouveau/include/nvkm/core/oclass.h new file mode 100644 index 000000000000..8e1b945d38f3 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/oclass.h @@ -0,0 +1,31 @@ +#ifndef __NVKM_OCLASS_H__ +#define __NVKM_OCLASS_H__ +#include <core/os.h> +#include <core/debug.h> +struct nvkm_oclass; +struct nvkm_object; + +struct nvkm_sclass { + int minver; + int maxver; + s32 oclass; + const struct nvkm_object_func *func; + int (*ctor)(const struct nvkm_oclass *, void *data, u32 size, + struct nvkm_object **); +}; + +struct nvkm_oclass { + int (*ctor)(const struct nvkm_oclass *, void *data, u32 size, + struct nvkm_object **); + struct nvkm_sclass base; + const void *priv; + const void *engn; + u32 handle; + u8 route; + u64 token; + u64 object; + struct nvkm_client *client; + struct nvkm_object *parent; + struct nvkm_engine *engine; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/oproxy.h b/drivers/gpu/drm/nouveau/include/nvkm/core/oproxy.h index bd52236cc2f4..d950d5ee188b 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/oproxy.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/oproxy.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_OPROXY_H__ #define __NVKM_OPROXY_H__ #define nvkm_oproxy(p) container_of((p), struct nvkm_oproxy, base) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/option.h b/drivers/gpu/drm/nouveau/include/nvkm/core/option.h index 80fdc146e816..a34a79bacbd0 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/option.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/option.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_OPTION_H__ #define __NVKM_OPTION_H__ #include <core/os.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/os.h b/drivers/gpu/drm/nouveau/include/nvkm/core/os.h index cd57e238ddd3..445602d1e8d3 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/os.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/os.h @@ -1,4 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_OS_H__ #define __NVKM_OS_H__ #include <nvif/os.h> + +#ifdef __BIG_ENDIAN +#define ioread16_native ioread16be +#define iowrite16_native iowrite16be +#define ioread32_native ioread32be +#define iowrite32_native iowrite32be +#else +#define ioread16_native ioread16 +#define iowrite16_native iowrite16 +#define ioread32_native ioread32 +#define iowrite32_native iowrite32 +#endif + +#define iowrite64_native(v,p) do { \ + u32 __iomem *_p = (u32 __iomem *)(p); \ + u64 _v = (v); \ + iowrite32_native(lower_32_bits(_v), &_p[0]); \ + iowrite32_native(upper_32_bits(_v), &_p[1]); \ +} while(0) #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/pci.h b/drivers/gpu/drm/nouveau/include/nvkm/core/pci.h index 78d41be20b8c..4c7f647d2dc9 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/pci.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/pci.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_DEVICE_PCI_H__ #define __NVKM_DEVICE_PCI_H__ #include <core/device.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/ramht.h b/drivers/gpu/drm/nouveau/include/nvkm/core/ramht.h index 5ee6298991e2..d5d789663aca 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/ramht.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/ramht.h @@ -1,6 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_RAMHT_H__ #define __NVKM_RAMHT_H__ #include <core/gpuobj.h> +struct nvkm_object; struct nvkm_ramht_data { struct nvkm_gpuobj *inst; diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h index ca9ed3d68f44..63df2290177f 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_SUBDEV_H__ #define __NVKM_SUBDEV_H__ #include <core/device.h> @@ -33,7 +34,7 @@ void nvkm_subdev_intr(struct nvkm_subdev *); /* subdev logging */ #define nvkm_printk_(s,l,p,f,a...) do { \ const struct nvkm_subdev *_subdev = (s); \ - if (_subdev->debug >= (l)) { \ + if (CONFIG_NOUVEAU_DEBUG >= (l) && _subdev->debug >= (l)) { \ dev_##p(_subdev->device->dev, "%s: "f, \ nvkm_subdev_name[_subdev->index], ##a); \ } \ diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h b/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h index 7c7d91cad09a..5c102d0206a7 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_DEVICE_TEGRA_H__ #define __NVKM_DEVICE_TEGRA_H__ #include <core/device.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/bsp.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/bsp.h index 904820558fc0..40613983fccb 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/bsp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/bsp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_BSP_H__ #define __NVKM_BSP_H__ #include <engine/xtensa.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h index b93f4c1a95e5..553245994450 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_CE_H__ #define __NVKM_CE_H__ #include <engine/falcon.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/cipher.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/cipher.h index 03fa57a7c30a..72b9da2de7c2 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/cipher.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/cipher.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_CIPHER_H__ #define __NVKM_CIPHER_H__ #include <core/engine.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h index 05f9c13ab8c3..e83193d3ccab 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_DISP_H__ #define __NVKM_DISP_H__ #define nvkm_disp(p) container_of((p), struct nvkm_disp, engine) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h index d2a6532ce3b9..0f9c1c702ed6 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h @@ -1,6 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_DMA_H__ #define __NVKM_DMA_H__ #include <core/engine.h> +#include <core/object.h> struct nvkm_client; struct nvkm_dmaobj { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h index e1a854e2ade1..6427747b6f77 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h @@ -1,8 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_FALCON_H__ #define __NVKM_FALCON_H__ #define nvkm_falcon(p) container_of((p), struct nvkm_falcon, engine) #include <core/engine.h> struct nvkm_fifo_chan; +struct nvkm_gpuobj; enum nvkm_falcon_dmaidx { FALCON_DMAIDX_UCODE = 0, @@ -77,7 +79,7 @@ struct nvkm_falcon_func { void (*load_imem)(struct nvkm_falcon *, void *, u32, u32, u16, u8, bool); void (*load_dmem)(struct nvkm_falcon *, void *, u32, u32, u8); void (*read_dmem)(struct nvkm_falcon *, u32, u32, u8, void *); - void (*bind_context)(struct nvkm_falcon *, struct nvkm_gpuobj *); + void (*bind_context)(struct nvkm_falcon *, struct nvkm_memory *); int (*wait_for_halt)(struct nvkm_falcon *, u32); int (*clear_interrupt)(struct nvkm_falcon *, u32); void (*set_start_addr)(struct nvkm_falcon *, u32 start_addr); @@ -112,7 +114,7 @@ void nvkm_falcon_load_imem(struct nvkm_falcon *, void *, u32, u32, u16, u8, bool); void nvkm_falcon_load_dmem(struct nvkm_falcon *, void *, u32, u32, u8); void nvkm_falcon_read_dmem(struct nvkm_falcon *, u32, u32, u8, void *); -void nvkm_falcon_bind_context(struct nvkm_falcon *, struct nvkm_gpuobj *); +void nvkm_falcon_bind_context(struct nvkm_falcon *, struct nvkm_memory *); void nvkm_falcon_set_start_addr(struct nvkm_falcon *, u32); void nvkm_falcon_start(struct nvkm_falcon *); int nvkm_falcon_wait_for_halt(struct nvkm_falcon *, u32); diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h index f00527b36acc..c17b3a9bf8fb 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h @@ -1,6 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_FIFO_H__ #define __NVKM_FIFO_H__ #include <core/engine.h> +#include <core/object.h> #include <core/event.h> #define NVKM_FIFO_CHID_NR 4096 @@ -21,7 +23,7 @@ struct nvkm_fifo_chan { u16 chid; struct nvkm_gpuobj *inst; struct nvkm_gpuobj *push; - struct nvkm_vm *vm; + struct nvkm_vmm *vmm; void __iomem *user; u64 addr; u32 size; diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h index c7944b19bed8..fb18f105fc43 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_GR_H__ #define __NVKM_GR_H__ #include <core/engine.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/mpeg.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/mpeg.h index 257738eff9f6..4ef3d4c5e358 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/mpeg.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/mpeg.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_MPEG_H__ #define __NVKM_MPEG_H__ #include <core/engine.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/msenc.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/msenc.h index 748ea9b7e559..985fc9490643 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/msenc.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/msenc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_MSENC_H__ #define __NVKM_MSENC_H__ #include <core/engine.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/mspdec.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/mspdec.h index 08516ca82e04..e03f33472486 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/mspdec.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/mspdec.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_MSPDEC_H__ #define __NVKM_MSPDEC_H__ #include <engine/falcon.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/msppp.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/msppp.h index 85fd306021ac..760bf17ea63d 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/msppp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/msppp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_MSPPP_H__ #define __NVKM_MSPPP_H__ #include <engine/falcon.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/msvld.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/msvld.h index 99757ed96f76..281866d2501d 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/msvld.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/msvld.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_MSVLD_H__ #define __NVKM_MSVLD_H__ #include <engine/falcon.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h index 00b2b227ff41..fe716859d4a9 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_NVDEC_H__ #define __NVKM_NVDEC_H__ #define nvkm_nvdec(p) container_of((p), struct nvkm_nvdec, engine) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/nvenc.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/nvenc.h index 8a819328059b..cdd68a8bab8b 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/nvenc.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/nvenc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_NVENC_H__ #define __NVKM_NVENC_H__ #include <core/engine.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/pm.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/pm.h index 240855ad8c8d..6cce8502f9df 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/pm.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/pm.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_PM_H__ #define __NVKM_PM_H__ #include <core/engine.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/sec.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/sec.h index 7317ef4c0207..b206b918c43e 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/sec.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/sec.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_SEC_H__ #define __NVKM_SEC_H__ #include <engine/falcon.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h index d3db1b1e75c4..f7d89822b905 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_SEC2_H__ #define __NVKM_SEC2_H__ #include <core/engine.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/sw.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/sw.h index 096e7dbd1e65..83a17c4e11e7 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/sw.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/sw.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_SW_H__ #define __NVKM_SW_H__ #include <core/engine.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/vic.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/vic.h index 2b0dc4c695c2..9b7d4877cf41 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/vic.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/vic.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_VIC_H__ #define __NVKM_VIC_H__ #include <core/engine.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/vp.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/vp.h index 616ea91e03f8..53bf8aed48fb 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/vp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/vp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_VP_H__ #define __NVKM_VP_H__ #include <engine/xtensa.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/xtensa.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/xtensa.h index b1fcc416732f..13c00ce6d556 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/xtensa.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/xtensa.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_XTENSA_H__ #define __NVKM_XTENSA_H__ #define nvkm_xtensa(p) container_of((p), struct nvkm_xtensa, engine) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h index d3071b5a4f98..f6bd94c7e0f7 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_BAR_H__ #define __NVKM_BAR_H__ #include <core/subdev.h> @@ -8,17 +9,22 @@ struct nvkm_bar { struct nvkm_subdev subdev; spinlock_t lock; + bool bar2; /* whether the BAR supports to be ioremapped WC or should be uncached */ bool iomap_uncached; }; +struct nvkm_vmm *nvkm_bar_bar1_vmm(struct nvkm_device *); +void nvkm_bar_bar2_init(struct nvkm_device *); +void nvkm_bar_bar2_fini(struct nvkm_device *); +struct nvkm_vmm *nvkm_bar_bar2_vmm(struct nvkm_device *); void nvkm_bar_flush(struct nvkm_bar *); -struct nvkm_vm *nvkm_bar_kmap(struct nvkm_bar *); -int nvkm_bar_umap(struct nvkm_bar *, u64 size, int type, struct nvkm_vma *); int nv50_bar_new(struct nvkm_device *, int, struct nvkm_bar **); int g84_bar_new(struct nvkm_device *, int, struct nvkm_bar **); int gf100_bar_new(struct nvkm_device *, int, struct nvkm_bar **); int gk20a_bar_new(struct nvkm_device *, int, struct nvkm_bar **); +int gm107_bar_new(struct nvkm_device *, int, struct nvkm_bar **); +int gm20b_bar_new(struct nvkm_device *, int, struct nvkm_bar **); #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios.h index a72f3290528a..979e9a144e7b 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_BIOS_H__ #define __NVKM_BIOS_H__ #include <core/subdev.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/M0203.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/M0203.h index cf202c793a1d..703a5b524b96 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/M0203.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/M0203.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_M0203_H__ #define __NVBIOS_M0203_H__ struct nvbios_M0203T { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/M0205.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/M0205.h index d34608ff241e..b4e14e45a0e8 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/M0205.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/M0205.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_M0205_H__ #define __NVBIOS_M0205_H__ struct nvbios_M0205T { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/M0209.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/M0209.h index c7ff8d9526e7..c09376894d12 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/M0209.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/M0209.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_M0209_H__ #define __NVBIOS_M0209_H__ u32 nvbios_M0209Te(struct nvkm_bios *, diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/P0260.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/P0260.h index 1c1c52eac97d..901d94ef11b8 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/P0260.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/P0260.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_P0260_H__ #define __NVBIOS_P0260_H__ u32 nvbios_P0260Te(struct nvkm_bios *, diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/bit.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/bit.h index 6711732b7cb1..d068586f3263 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/bit.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/bit.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_BIT_H__ #define __NVBIOS_BIT_H__ struct bit_entry { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/bmp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/bmp.h index 3f0c7c414026..9a3f9483ee75 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/bmp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/bmp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_BMP_H__ #define __NVBIOS_BMP_H__ static inline u16 diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/boost.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/boost.h index 2ff64a20c0ec..a1c48c6b223b 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/boost.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/boost.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_BOOST_H__ #define __NVBIOS_BOOST_H__ u32 nvbios_boostTe(struct nvkm_bios *, u8 *, u8 *, u8 *, u8 *, u8 *, u8 *); diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/conn.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/conn.h index deb477282dde..ed9e0a6a0011 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/conn.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/conn.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_CONN_H__ #define __NVBIOS_CONN_H__ enum dcb_connector_type { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/cstep.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/cstep.h index 76fe7d50a1ce..49343d276e11 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/cstep.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/cstep.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_CSTEP_H__ #define __NVBIOS_CSTEP_H__ u32 nvbios_cstepTe(struct nvkm_bios *, diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/dcb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/dcb.h index 903d117603d8..63ddc6ed897a 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/dcb.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/dcb.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_DCB_H__ #define __NVBIOS_DCB_H__ enum dcb_output_type { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h index c5a6ebd5a478..423d92de0aae 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_DISP_H__ #define __NVBIOS_DISP_H__ u16 nvbios_disp_table(struct nvkm_bios *, diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/dp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/dp.h index b4d39df70d4e..df34b41838d6 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/dp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/dp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_DP_H__ #define __NVBIOS_DP_H__ struct nvbios_dpout { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/extdev.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/extdev.h index bb49bd5f879e..f93e4f951f2f 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/extdev.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/extdev.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_EXTDEV_H__ #define __NVBIOS_EXTDEV_H__ enum nvbios_extdev_type { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/fan.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/fan.h index a7513e8406a3..09c1d3b9d009 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/fan.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/fan.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_FAN_H__ #define __NVBIOS_FAN_H__ #include <subdev/bios/therm.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/gpio.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/gpio.h index b7a54e605469..b71a3555c64e 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/gpio.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/gpio.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_GPIO_H__ #define __NVBIOS_GPIO_H__ enum dcb_gpio_func_name { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/i2c.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/i2c.h index 85c529ecf9b1..ae1f7483dd28 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/i2c.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/i2c.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_I2C_H__ #define __NVBIOS_I2C_H__ enum dcb_i2c_type { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/iccsense.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/iccsense.h index e933d3eede70..e220a1ac1387 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/iccsense.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/iccsense.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_ICCSENSE_H__ #define __NVBIOS_ICCSENSE_H__ struct pwr_rail_resistor_t { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/image.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/image.h index e15d63b9a5eb..893288b060de 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/image.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/image.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_IMAGE_H__ #define __NVBIOS_IMAGE_H__ struct nvbios_image { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/init.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/init.h index 06ab48052128..744b1868e789 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/init.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/init.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_INIT_H__ #define __NVBIOS_INIT_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/mxm.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/mxm.h index 4e31b64c5edf..327bf9c4b703 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/mxm.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/mxm.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_MXM_H__ #define __NVBIOS_MXM_H__ u16 mxm_table(struct nvkm_bios *, u8 *ver, u8 *hdr); diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/npde.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/npde.h index 64a59549b7ea..ee5419b7b45b 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/npde.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/npde.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_NPDE_H__ #define __NVBIOS_NPDE_H__ struct nvbios_npdeT { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/pcir.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/pcir.h index e85931541f4f..1dffe8d6cc81 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/pcir.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/pcir.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_PCIR_H__ #define __NVBIOS_PCIR_H__ struct nvbios_pcirT { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/perf.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/perf.h index 478b1c0d2089..0ee84ea6d737 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/perf.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/perf.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_PERF_H__ #define __NVBIOS_PERF_H__ u32 nvbios_perf_table(struct nvkm_bios *, u8 *ver, u8 *hdr, diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/pll.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/pll.h index 5a69978d1e3b..ab964e085f02 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/pll.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/pll.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_PLL_H__ #define __NVBIOS_PLL_H__ /*XXX: kill me */ diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/pmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/pmu.h index 3a643df6de04..fb41ecab8f8c 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/pmu.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/pmu.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_PMU_H__ #define __NVBIOS_PMU_H__ struct nvbios_pmuT { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/power_budget.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/power_budget.h index f5f4a14c4030..ff12d810dce3 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/power_budget.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/power_budget.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_POWER_BUDGET_H__ #define __NVBIOS_POWER_BUDGET_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/ramcfg.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/ramcfg.h index dca6c060a24f..2b87a38adb7a 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/ramcfg.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/ramcfg.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_RAMCFG_H__ #define __NVBIOS_RAMCFG_H__ struct nvbios_ramcfg { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/rammap.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/rammap.h index 8d8ee13721ec..471eef434b51 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/rammap.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/rammap.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_RAMMAP_H__ #define __NVBIOS_RAMMAP_H__ #include <subdev/bios/ramcfg.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/therm.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/therm.h index dd3ba960e75d..46a3b15e10ec 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/therm.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/therm.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_THERM_H__ #define __NVBIOS_THERM_H__ struct nvbios_therm_threshold { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/timing.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/timing.h index 38188d4c9ab5..40ceabf37827 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/timing.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/timing.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_TIMING_H__ #define __NVBIOS_TIMING_H__ #include <subdev/bios/ramcfg.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/vmap.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/vmap.h index bea31cdd1dd1..67419bad584c 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/vmap.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/vmap.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_VMAP_H__ #define __NVBIOS_VMAP_H__ struct nvbios_vmap { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/volt.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/volt.h index f0baa2c7de09..6b36d5ecb8f9 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/volt.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/volt.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_VOLT_H__ #define __NVBIOS_VOLT_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/vpstate.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/vpstate.h index 87f804fc3a88..36f3028d58ef 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/vpstate.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/vpstate.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_VPSTATE_H__ #define __NVBIOS_VPSTATE_H__ struct nvbios_vpstate_header { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/xpio.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/xpio.h index 0c0fe234ff12..d1bb5d044585 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/xpio.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/xpio.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_XPIO_H__ #define __NVBIOS_XPIO_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bus.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bus.h index 33a057c334f2..7695f7f77a06 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bus.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bus.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_BUS_H__ #define __NVKM_BUS_H__ #include <core/subdev.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h index e5275f742977..15db75ef0189 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_CLK_H__ #define __NVKM_CLK_H__ #include <core/subdev.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h index 709d786f1808..40558064d589 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_DEVINIT_H__ #define __NVKM_DEVINIT_H__ #include <core/subdev.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h index 28d513fbf44c..adb78f7d083a 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h @@ -1,8 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_FB_H__ #define __NVKM_FB_H__ #include <core/subdev.h> - -#include <subdev/mmu.h> +#include <core/mm.h> /* memory type/access flags, do not match hardware values */ #define NV_MEM_ACCESS_RO 1 @@ -21,22 +21,6 @@ #define NVKM_RAM_TYPE_VM 0x7f #define NV_MEM_COMP_VM 0x03 -struct nvkm_mem { - struct drm_device *dev; - - struct nvkm_vma bar_vma; - struct nvkm_vma vma[2]; - u8 page_shift; - - struct nvkm_mm_node *tag; - struct nvkm_mm_node *mem; - dma_addr_t *pages; - u32 memtype; - u64 offset; - u64 size; - struct sg_table *sg; -}; - struct nvkm_fb_tile { struct nvkm_mm_node *tag; u32 addr; @@ -50,6 +34,7 @@ struct nvkm_fb { struct nvkm_subdev subdev; struct nvkm_ram *ram; + struct nvkm_mm tags; struct { struct nvkm_fb_tile region[16]; @@ -62,7 +47,6 @@ struct nvkm_fb { struct nvkm_memory *mmu_wr; }; -bool nvkm_fb_memtype_valid(struct nvkm_fb *, u32 memtype); void nvkm_fb_tile_init(struct nvkm_fb *, int region, u32 addr, u32 size, u32 pitch, u32 flags, struct nvkm_fb_tile *); void nvkm_fb_tile_fini(struct nvkm_fb *, int region, struct nvkm_fb_tile *); @@ -129,8 +113,11 @@ struct nvkm_ram { u64 size; #define NVKM_RAM_MM_SHIFT 12 +#define NVKM_RAM_MM_ANY (NVKM_MM_HEAP_ANY + 0) +#define NVKM_RAM_MM_NORMAL (NVKM_MM_HEAP_ANY + 1) +#define NVKM_RAM_MM_NOMAP (NVKM_MM_HEAP_ANY + 2) +#define NVKM_RAM_MM_MIXED (NVKM_MM_HEAP_ANY + 3) struct nvkm_mm vram; - struct nvkm_mm tags; u64 stolen; int ranks; @@ -147,6 +134,10 @@ struct nvkm_ram { struct nvkm_ram_data target; }; +int +nvkm_ram_get(struct nvkm_device *, u8 heap, u8 type, u8 page, u64 size, + bool contig, bool back, struct nvkm_memory **); + struct nvkm_ram_func { u64 upper; u32 (*probe_fbp)(const struct nvkm_ram_func *, struct nvkm_device *, @@ -157,14 +148,8 @@ struct nvkm_ram_func { void *(*dtor)(struct nvkm_ram *); int (*init)(struct nvkm_ram *); - int (*get)(struct nvkm_ram *, u64 size, u32 align, u32 size_nc, - u32 type, struct nvkm_mem **); - void (*put)(struct nvkm_ram *, struct nvkm_mem **); - int (*calc)(struct nvkm_ram *, u32 freq); int (*prog)(struct nvkm_ram *); void (*tidy)(struct nvkm_ram *); }; - -extern const u8 gf100_pte_storage_type_map[256]; #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fuse.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fuse.h index ae201e388487..092193b7f98e 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fuse.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fuse.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_FUSE_H__ #define __NVKM_FUSE_H__ #include <core/subdev.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h index 9b9c6d2f90b6..ee54899076e3 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_GPIO_H__ #define __NVKM_GPIO_H__ #include <core/subdev.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h index ce23cc6c672e..eef54e9b5d77 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_I2C_H__ #define __NVKM_I2C_H__ #include <core/subdev.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ibus.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ibus.h index 6e2b70bd2f41..919653c1d101 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ibus.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ibus.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_IBUS_H__ #define __NVKM_IBUS_H__ #include <core/subdev.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/iccsense.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/iccsense.h index b7a9b041e130..be9475cd94fd 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/iccsense.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/iccsense.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_ICCSENSE_H__ #define __NVKM_ICCSENSE_H__ diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h index 40f845e31272..36ed520ed2d0 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_INSTMEM_H__ #define __NVKM_INSTMEM_H__ #include <core/subdev.h> @@ -9,6 +10,7 @@ struct nvkm_instmem { spinlock_t lock; struct list_head list; + struct list_head boot; u32 reserved; struct nvkm_memory *vbios; diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h index cd755baf9cab..95b611554d53 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_LTC_H__ #define __NVKM_LTC_H__ #include <core/subdev.h> @@ -14,8 +15,7 @@ struct nvkm_ltc { u32 num_tags; u32 tag_base; - struct nvkm_mm tags; - struct nvkm_mm_node *tag_ram; + struct nvkm_memory *tag_ram; int zbc_min; int zbc_max; @@ -23,9 +23,7 @@ struct nvkm_ltc { u32 zbc_depth[NVKM_LTC_MAX_ZBC_CNT]; }; -int nvkm_ltc_tags_alloc(struct nvkm_ltc *, u32 count, struct nvkm_mm_node **); -void nvkm_ltc_tags_free(struct nvkm_ltc *, struct nvkm_mm_node **); -void nvkm_ltc_tags_clear(struct nvkm_ltc *, u32 first, u32 count); +void nvkm_ltc_tags_clear(struct nvkm_device *, u32 first, u32 count); int nvkm_ltc_zbc_color_get(struct nvkm_ltc *, int index, const u32[4]); int nvkm_ltc_zbc_depth_get(struct nvkm_ltc *, int index, const u32); diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h index 58f10890c3b6..61c93c86e2e2 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_MC_H__ #define __NVKM_MC_H__ #include <core/subdev.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h index dcd3deff27a4..0760b93e9d1f 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h @@ -1,68 +1,131 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_MMU_H__ #define __NVKM_MMU_H__ #include <core/subdev.h> -#include <core/mm.h> -struct nvkm_device; -struct nvkm_mem; - -struct nvkm_vm_pgt { - struct nvkm_memory *mem[2]; - u32 refcount[2]; -}; - -struct nvkm_vm_pgd { - struct list_head head; - struct nvkm_gpuobj *obj; -}; struct nvkm_vma { struct list_head head; - int refcount; - struct nvkm_vm *vm; - struct nvkm_mm_node *node; - u64 offset; - u32 access; + struct rb_node tree; + u64 addr; + u64 size:50; + bool mapref:1; /* PTs (de)referenced on (un)map (vs pre-allocated). */ + bool sparse:1; /* Unmapped PDEs/PTEs will not trigger MMU faults. */ +#define NVKM_VMA_PAGE_NONE 7 + u8 page:3; /* Requested page type (index, or NONE for automatic). */ + u8 refd:3; /* Current page type (index, or NONE for unreferenced). */ + bool used:1; /* Region allocated. */ + bool part:1; /* Region was split from an allocated region by map(). */ + bool user:1; /* Region user-allocated. */ + bool busy:1; /* Region busy (for temporarily preventing user access). */ + struct nvkm_memory *memory; /* Memory currently mapped into VMA. */ + struct nvkm_tags *tags; /* Compression tag reference. */ }; -struct nvkm_vm { +struct nvkm_vmm { + const struct nvkm_vmm_func *func; struct nvkm_mmu *mmu; - + const char *name; + u32 debug; + struct kref kref; struct mutex mutex; - struct nvkm_mm mm; - struct kref refcount; - struct list_head pgd_list; + u64 start; + u64 limit; + + struct nvkm_vmm_pt *pd; + struct list_head join; + + struct list_head list; + struct rb_root free; + struct rb_root root; + + bool bootstrapped; atomic_t engref[NVKM_SUBDEV_NR]; - struct nvkm_vm_pgt *pgt; - u32 fpde; - u32 lpde; + dma_addr_t null; + void *nullp; }; -int nvkm_vm_new(struct nvkm_device *, u64 offset, u64 length, u64 mm_offset, - struct lock_class_key *, struct nvkm_vm **); -int nvkm_vm_ref(struct nvkm_vm *, struct nvkm_vm **, struct nvkm_gpuobj *pgd); -int nvkm_vm_boot(struct nvkm_vm *, u64 size); -int nvkm_vm_get(struct nvkm_vm *, u64 size, u32 page_shift, u32 access, - struct nvkm_vma *); -void nvkm_vm_put(struct nvkm_vma *); -void nvkm_vm_map(struct nvkm_vma *, struct nvkm_mem *); -void nvkm_vm_map_at(struct nvkm_vma *, u64 offset, struct nvkm_mem *); -void nvkm_vm_unmap(struct nvkm_vma *); -void nvkm_vm_unmap_at(struct nvkm_vma *, u64 offset, u64 length); +int nvkm_vmm_new(struct nvkm_device *, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *, const char *name, struct nvkm_vmm **); +struct nvkm_vmm *nvkm_vmm_ref(struct nvkm_vmm *); +void nvkm_vmm_unref(struct nvkm_vmm **); +int nvkm_vmm_boot(struct nvkm_vmm *); +int nvkm_vmm_join(struct nvkm_vmm *, struct nvkm_memory *inst); +void nvkm_vmm_part(struct nvkm_vmm *, struct nvkm_memory *inst); +int nvkm_vmm_get(struct nvkm_vmm *, u8 page, u64 size, struct nvkm_vma **); +void nvkm_vmm_put(struct nvkm_vmm *, struct nvkm_vma **); + +struct nvkm_vmm_map { + struct nvkm_memory *memory; + u64 offset; + + struct nvkm_mm_node *mem; + struct scatterlist *sgl; + dma_addr_t *dma; + u64 off; + + const struct nvkm_vmm_page *page; + + struct nvkm_tags *tags; + u64 next; + u64 type; + u64 ctag; +}; + +int nvkm_vmm_map(struct nvkm_vmm *, struct nvkm_vma *, void *argv, u32 argc, + struct nvkm_vmm_map *); +void nvkm_vmm_unmap(struct nvkm_vmm *, struct nvkm_vma *); + +struct nvkm_memory *nvkm_umem_search(struct nvkm_client *, u64); +struct nvkm_vmm *nvkm_uvmm_search(struct nvkm_client *, u64 handle); struct nvkm_mmu { const struct nvkm_mmu_func *func; struct nvkm_subdev subdev; - u64 limit; u8 dma_bits; - u8 lpg_shift; + + int heap_nr; + struct { +#define NVKM_MEM_VRAM 0x01 +#define NVKM_MEM_HOST 0x02 +#define NVKM_MEM_COMP 0x04 +#define NVKM_MEM_DISP 0x08 + u8 type; + u64 size; + } heap[4]; + + int type_nr; + struct { +#define NVKM_MEM_KIND 0x10 +#define NVKM_MEM_MAPPABLE 0x20 +#define NVKM_MEM_COHERENT 0x40 +#define NVKM_MEM_UNCACHED 0x80 + u8 type; + u8 heap; + } type[16]; + + struct nvkm_vmm *vmm; + + struct { + struct mutex mutex; + struct list_head list; + } ptc, ptp; + + struct nvkm_device_oclass user; }; int nv04_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int nv41_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int nv44_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int nv50_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); +int g84_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int gf100_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); +int gk104_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); +int gk20a_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); +int gm200_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); +int gm20b_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); +int gp100_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); +int gp10b_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mxm.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mxm.h index ed0250139dae..0fd6d6f8eada 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mxm.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mxm.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_MXM_H__ #define __NVKM_MXM_H__ #include <core/subdev.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h index ac2a695963c1..23803cc859fd 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_PCI_H__ #define __NVKM_PCI_H__ #include <core/subdev.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h index e7f04732a425..4bc9384046c6 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_PMU_H__ #define __NVKM_PMU_H__ #include <core/subdev.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h index 1bfd93b85575..b1ac47eb786e 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_THERM_H__ #define __NVKM_THERM_H__ #include <core/subdev.h> @@ -97,4 +98,5 @@ int gt215_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gf119_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gm107_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gm200_therm_new(struct nvkm_device *, int, struct nvkm_therm **); +int gp100_therm_new(struct nvkm_device *, int, struct nvkm_therm **); #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h index ff0709652f80..e9b0746826ca 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_TIMER_H__ #define __NVKM_TIMER_H__ #include <core/subdev.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h index d23209b62c25..f7d3eb647e2e 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_TOP_H__ #define __NVKM_TOP_H__ #include <core/subdev.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/vga.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/vga.h index ce5636fe2a66..312933ad7c2b 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/vga.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/vga.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NOUVEAU_VGA_H__ #define __NOUVEAU_VGA_H__ #include <core/subdev.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h index 08ef9983c643..8a0f85f5fc1a 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_VOLT_H__ #define __NVKM_VOLT_H__ #include <core/subdev.h> diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index f98f800cc011..ece650a0c5f9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -34,6 +34,7 @@ #include "nouveau_gem.h" #include "nouveau_chan.h" #include "nouveau_abi16.h" +#include "nouveau_vmm.h" static struct nouveau_abi16 * nouveau_abi16(struct drm_file *file_priv) @@ -134,7 +135,7 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16, } if (chan->ntfy) { - nouveau_bo_vma_del(chan->ntfy, &chan->ntfy_vma); + nouveau_vma_del(&chan->ntfy_vma); nouveau_bo_unpin(chan->ntfy); drm_gem_object_unreference_unlocked(&chan->ntfy->gem); } @@ -184,29 +185,33 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) getparam->value = device->info.chipset; break; case NOUVEAU_GETPARAM_PCI_VENDOR: - if (nvxx_device(device)->func->pci) + if (device->info.platform != NV_DEVICE_INFO_V0_SOC) getparam->value = dev->pdev->vendor; else getparam->value = 0; break; case NOUVEAU_GETPARAM_PCI_DEVICE: - if (nvxx_device(device)->func->pci) + if (device->info.platform != NV_DEVICE_INFO_V0_SOC) getparam->value = dev->pdev->device; else getparam->value = 0; break; case NOUVEAU_GETPARAM_BUS_TYPE: - if (!nvxx_device(device)->func->pci) - getparam->value = 3; - else - if (pci_find_capability(dev->pdev, PCI_CAP_ID_AGP)) - getparam->value = 0; - else - if (!pci_is_pcie(dev->pdev)) - getparam->value = 1; - else - getparam->value = 2; - break; + switch (device->info.platform) { + case NV_DEVICE_INFO_V0_AGP : getparam->value = 0; break; + case NV_DEVICE_INFO_V0_PCI : getparam->value = 1; break; + case NV_DEVICE_INFO_V0_PCIE: getparam->value = 2; break; + case NV_DEVICE_INFO_V0_SOC : getparam->value = 3; break; + case NV_DEVICE_INFO_V0_IGP : + if (!pci_is_pcie(dev->pdev)) + getparam->value = 1; + else + getparam->value = 2; + break; + default: + WARN_ON(1); + break; + } case NOUVEAU_GETPARAM_FB_SIZE: getparam->value = drm->gem.vram_available; break; @@ -329,8 +334,7 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) goto done; if (device->info.family >= NV_DEVICE_INFO_V0_TESLA) { - ret = nouveau_bo_vma_add(chan->ntfy, cli->vm, - &chan->ntfy_vma); + ret = nouveau_vma_new(chan->ntfy, &cli->vmm, &chan->ntfy_vma); if (ret) goto done; } @@ -340,7 +344,7 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) if (ret) goto done; - ret = nvkm_mm_init(&chan->heap, 0, PAGE_SIZE, 1); + ret = nvkm_mm_init(&chan->heap, 0, 0, PAGE_SIZE, 1); done: if (ret) nouveau_abi16_chan_fini(abi16, chan); @@ -548,8 +552,8 @@ nouveau_abi16_ioctl_notifierobj_alloc(ABI16_IOCTL_ARGS) if (device->info.family >= NV_DEVICE_INFO_V0_TESLA) { args.target = NV_DMA_V0_TARGET_VM; args.access = NV_DMA_V0_ACCESS_VM; - args.start += chan->ntfy_vma.offset; - args.limit += chan->ntfy_vma.offset; + args.start += chan->ntfy_vma->addr; + args.limit += chan->ntfy_vma->addr; } else if (drm->agp.bridge) { args.target = NV_DMA_V0_TARGET_AGP; diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h index 841cc556fad8..36fde1ff3ad5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.h +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NOUVEAU_ABI16_H__ #define __NOUVEAU_ABI16_H__ @@ -23,7 +24,7 @@ struct nouveau_abi16_chan { struct nouveau_channel *chan; struct list_head notifiers; struct nouveau_bo *ntfy; - struct nvkm_vma ntfy_vma; + struct nouveau_vma *ntfy_vma; struct nvkm_mm heap; }; diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c index 7459ef9943ec..5ffcb6683776 100644 --- a/drivers/gpu/drm/nouveau/nouveau_acpi.c +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/pci.h> #include <linux/acpi.h> #include <linux/slab.h> diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.h b/drivers/gpu/drm/nouveau/nouveau_acpi.h index 2f03653aff86..b86294fc99e8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_acpi.h +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NOUVEAU_ACPI_H__ #define __NOUVEAU_ACPI_H__ diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c index dd6fba55ad5d..66bf2aff4a3e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bios.c +++ b/drivers/gpu/drm/nouveau/nouveau_bios.c @@ -1478,9 +1478,13 @@ parse_dcb20_entry(struct drm_device *dev, struct dcb_table *dcb, case 1: entry->dpconf.link_bw = 270000; break; - default: + case 2: entry->dpconf.link_bw = 540000; break; + case 3: + default: + entry->dpconf.link_bw = 810000; + break; } switch ((conf & 0x0f000000) >> 24) { case 0xf: @@ -1964,7 +1968,7 @@ static int load_nv17_hw_sequencer_ucode(struct drm_device *dev, * The microcode entries are found by the "HWSQ" signature. */ - const uint8_t hwsq_signature[] = { 'H', 'W', 'S', 'Q' }; + static const uint8_t hwsq_signature[] = { 'H', 'W', 'S', 'Q' }; const int sz = sizeof(hwsq_signature); int hwsq_offset; @@ -1980,7 +1984,7 @@ uint8_t *nouveau_bios_embedded_edid(struct drm_device *dev) { struct nouveau_drm *drm = nouveau_drm(dev); struct nvbios *bios = &drm->vbios; - const uint8_t edid_sig[] = { + static const uint8_t edid_sig[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 }; uint16_t offset = 0; uint16_t newoffset; diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index e427f80344c4..2615912430cc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -37,6 +37,12 @@ #include "nouveau_bo.h" #include "nouveau_ttm.h" #include "nouveau_gem.h" +#include "nouveau_mem.h" +#include "nouveau_vmm.h" + +#include <nvif/class.h> +#include <nvif/if500b.h> +#include <nvif/if900b.h> /* * NV10-NV40 tiling helpers @@ -48,8 +54,7 @@ nv10_bo_update_tile_region(struct drm_device *dev, struct nouveau_drm_tile *reg, { struct nouveau_drm *drm = nouveau_drm(dev); int i = reg - drm->tile.reg; - struct nvkm_device *device = nvxx_device(&drm->client.device); - struct nvkm_fb *fb = device->fb; + struct nvkm_fb *fb = nvxx_fb(&drm->client.device); struct nvkm_fb_tile *tile = &fb->tile.region[i]; nouveau_fence_unref(®->fence); @@ -97,7 +102,7 @@ nv10_bo_put_tile_region(struct drm_device *dev, struct nouveau_drm_tile *tile, static struct nouveau_drm_tile * nv10_bo_set_tiling(struct drm_device *dev, u32 addr, - u32 size, u32 pitch, u32 flags) + u32 size, u32 pitch, u32 zeta) { struct nouveau_drm *drm = nouveau_drm(dev); struct nvkm_fb *fb = nvxx_fb(&drm->client.device); @@ -120,8 +125,7 @@ nv10_bo_set_tiling(struct drm_device *dev, u32 addr, } if (found) - nv10_bo_update_tile_region(dev, found, addr, size, - pitch, flags); + nv10_bo_update_tile_region(dev, found, addr, size, pitch, zeta); return found; } @@ -155,27 +159,27 @@ nouveau_bo_fixup_align(struct nouveau_bo *nvbo, u32 flags, struct nvif_device *device = &drm->client.device; if (device->info.family < NV_DEVICE_INFO_V0_TESLA) { - if (nvbo->tile_mode) { + if (nvbo->mode) { if (device->info.chipset >= 0x40) { *align = 65536; - *size = roundup_64(*size, 64 * nvbo->tile_mode); + *size = roundup_64(*size, 64 * nvbo->mode); } else if (device->info.chipset >= 0x30) { *align = 32768; - *size = roundup_64(*size, 64 * nvbo->tile_mode); + *size = roundup_64(*size, 64 * nvbo->mode); } else if (device->info.chipset >= 0x20) { *align = 16384; - *size = roundup_64(*size, 64 * nvbo->tile_mode); + *size = roundup_64(*size, 64 * nvbo->mode); } else if (device->info.chipset >= 0x10) { *align = 16384; - *size = roundup_64(*size, 32 * nvbo->tile_mode); + *size = roundup_64(*size, 32 * nvbo->mode); } } } else { - *size = roundup_64(*size, (1 << nvbo->page_shift)); - *align = max((1 << nvbo->page_shift), *align); + *size = roundup_64(*size, (1 << nvbo->page)); + *align = max((1 << nvbo->page), *align); } *size = roundup_64(*size, PAGE_SIZE); @@ -187,11 +191,13 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align, struct sg_table *sg, struct reservation_object *robj, struct nouveau_bo **pnvbo) { - struct nouveau_drm *drm = nouveau_drm(cli->dev); + struct nouveau_drm *drm = cli->drm; struct nouveau_bo *nvbo; + struct nvif_mmu *mmu = &cli->mmu; + struct nvif_vmm *vmm = &cli->vmm.vmm; size_t acc_size; - int ret; int type = ttm_bo_type_device; + int ret, i, pi = -1; if (!size) { NV_WARN(drm, "skipped size %016llx\n", size); @@ -207,19 +213,80 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align, INIT_LIST_HEAD(&nvbo->head); INIT_LIST_HEAD(&nvbo->entry); INIT_LIST_HEAD(&nvbo->vma_list); - nvbo->tile_mode = tile_mode; - nvbo->tile_flags = tile_flags; nvbo->bo.bdev = &drm->ttm.bdev; nvbo->cli = cli; - if (!nvxx_device(&drm->client.device)->func->cpu_coherent) - nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED; + /* This is confusing, and doesn't actually mean we want an uncached + * mapping, but is what NOUVEAU_GEM_DOMAIN_COHERENT gets translated + * into in nouveau_gem_new(). + */ + if (flags & TTM_PL_FLAG_UNCACHED) { + /* Determine if we can get a cache-coherent map, forcing + * uncached mapping if we can't. + */ + if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED) + nvbo->force_coherent = true; + } + + if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) { + nvbo->kind = (tile_flags & 0x0000ff00) >> 8; + if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) { + kfree(nvbo); + return -EINVAL; + } + + nvbo->comp = mmu->kind[nvbo->kind] != nvbo->kind; + } else + if (cli->device.info.family >= NV_DEVICE_INFO_V0_TESLA) { + nvbo->kind = (tile_flags & 0x00007f00) >> 8; + nvbo->comp = (tile_flags & 0x00030000) >> 16; + if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) { + kfree(nvbo); + return -EINVAL; + } + } else { + nvbo->zeta = (tile_flags & 0x00000007); + } + nvbo->mode = tile_mode; + nvbo->contig = !(tile_flags & NOUVEAU_GEM_TILE_NONCONTIG); + + /* Determine the desirable target GPU page size for the buffer. */ + for (i = 0; i < vmm->page_nr; i++) { + /* Because we cannot currently allow VMM maps to fail + * during buffer migration, we need to determine page + * size for the buffer up-front, and pre-allocate its + * page tables. + * + * Skip page sizes that can't support needed domains. + */ + if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE && + (flags & TTM_PL_FLAG_VRAM) && !vmm->page[i].vram) + continue; + if ((flags & TTM_PL_FLAG_TT ) && !vmm->page[i].host) + continue; + + /* Select this page size if it's the first that supports + * the potential memory domains, or when it's compatible + * with the requested compression settings. + */ + if (pi < 0 || !nvbo->comp || vmm->page[i].comp) + pi = i; + + /* Stop once the buffer is larger than the current page size. */ + if (size >= 1ULL << vmm->page[i].shift) + break; + } + + if (WARN_ON(pi < 0)) + return -EINVAL; - nvbo->page_shift = 12; - if (drm->client.vm) { - if (!(flags & TTM_PL_FLAG_TT) && size > 256 * 1024) - nvbo->page_shift = drm->client.vm->mmu->lpg_shift; + /* Disable compression if suitable settings couldn't be found. */ + if (nvbo->comp && !vmm->page[pi].comp) { + if (mmu->object.oclass >= NVIF_CLASS_MMU_GF100) + nvbo->kind = mmu->kind[nvbo->kind]; + nvbo->comp = 0; } + nvbo->page = vmm->page[pi].shift; nouveau_bo_fixup_align(nvbo, flags, &align, &size); nvbo->bo.mem.num_pages = size >> PAGE_SHIFT; @@ -262,7 +329,7 @@ set_placement_range(struct nouveau_bo *nvbo, uint32_t type) unsigned i, fpfn, lpfn; if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CELSIUS && - nvbo->tile_mode && (type & TTM_PL_FLAG_VRAM) && + nvbo->mode && (type & TTM_PL_FLAG_VRAM) && nvbo->bo.mem.num_pages < vram_pages / 4) { /* * Make sure that the color and depth buffers are handled @@ -270,7 +337,7 @@ set_placement_range(struct nouveau_bo *nvbo, uint32_t type) * speed up when alpha-blending and depth-test are enabled * at the same time. */ - if (nvbo->tile_flags & NOUVEAU_GEM_TILE_ZETA) { + if (nvbo->zeta) { fpfn = vram_pages / 2; lpfn = ~0; } else { @@ -321,14 +388,10 @@ nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t memtype, bool contig) if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA && memtype == TTM_PL_FLAG_VRAM && contig) { - if (nvbo->tile_flags & NOUVEAU_GEM_TILE_NONCONTIG) { - if (bo->mem.mem_type == TTM_PL_VRAM) { - struct nvkm_mem *mem = bo->mem.mm_node; - if (!nvkm_mm_contiguous(mem->mem)) - evict = true; - } - nvbo->tile_flags &= ~NOUVEAU_GEM_TILE_NONCONTIG; + if (!nvbo->contig) { + nvbo->contig = true; force = true; + evict = true; } } @@ -376,7 +439,7 @@ nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t memtype, bool contig) out: if (force && ret) - nvbo->tile_flags |= NOUVEAU_GEM_TILE_NONCONTIG; + nvbo->contig = false; ttm_bo_unreserve(bo); return ret; } @@ -446,7 +509,6 @@ void nouveau_bo_sync_for_device(struct nouveau_bo *nvbo) { struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); - struct nvkm_device *device = nvxx_device(&drm->client.device); struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm; int i; @@ -458,7 +520,8 @@ nouveau_bo_sync_for_device(struct nouveau_bo *nvbo) return; for (i = 0; i < ttm_dma->ttm.num_pages; i++) - dma_sync_single_for_device(device->dev, ttm_dma->dma_address[i], + dma_sync_single_for_device(drm->dev->dev, + ttm_dma->dma_address[i], PAGE_SIZE, DMA_TO_DEVICE); } @@ -466,7 +529,6 @@ void nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo) { struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); - struct nvkm_device *device = nvxx_device(&drm->client.device); struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm; int i; @@ -478,7 +540,7 @@ nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo) return; for (i = 0; i < ttm_dma->ttm.num_pages; i++) - dma_sync_single_for_cpu(device->dev, ttm_dma->dma_address[i], + dma_sync_single_for_cpu(drm->dev->dev, ttm_dma->dma_address[i], PAGE_SIZE, DMA_FROM_DEVICE); } @@ -568,6 +630,7 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, struct ttm_mem_type_manager *man) { struct nouveau_drm *drm = nouveau_bdev(bdev); + struct nvif_mmu *mmu = &drm->client.mmu; switch (type) { case TTM_PL_SYSTEM: @@ -584,7 +647,8 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) { /* Some BARs do not support being ioremapped WC */ - if (nvxx_bar(&drm->client.device)->iomap_uncached) { + const u8 type = mmu->type[drm->ttm.type_vram].type; + if (type & NVIF_MEM_UNCACHED) { man->available_caching = TTM_PL_FLAG_UNCACHED; man->default_caching = TTM_PL_FLAG_UNCACHED; } @@ -659,14 +723,14 @@ static int nve0_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo, struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg) { - struct nvkm_mem *mem = old_reg->mm_node; + struct nouveau_mem *mem = nouveau_mem(old_reg); int ret = RING_SPACE(chan, 10); if (ret == 0) { BEGIN_NVC0(chan, NvSubCopy, 0x0400, 8); - OUT_RING (chan, upper_32_bits(mem->vma[0].offset)); - OUT_RING (chan, lower_32_bits(mem->vma[0].offset)); - OUT_RING (chan, upper_32_bits(mem->vma[1].offset)); - OUT_RING (chan, lower_32_bits(mem->vma[1].offset)); + OUT_RING (chan, upper_32_bits(mem->vma[0].addr)); + OUT_RING (chan, lower_32_bits(mem->vma[0].addr)); + OUT_RING (chan, upper_32_bits(mem->vma[1].addr)); + OUT_RING (chan, lower_32_bits(mem->vma[1].addr)); OUT_RING (chan, PAGE_SIZE); OUT_RING (chan, PAGE_SIZE); OUT_RING (chan, PAGE_SIZE); @@ -691,9 +755,9 @@ static int nvc0_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo, struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg) { - struct nvkm_mem *mem = old_reg->mm_node; - u64 src_offset = mem->vma[0].offset; - u64 dst_offset = mem->vma[1].offset; + struct nouveau_mem *mem = nouveau_mem(old_reg); + u64 src_offset = mem->vma[0].addr; + u64 dst_offset = mem->vma[1].addr; u32 page_count = new_reg->num_pages; int ret; @@ -729,9 +793,9 @@ static int nvc0_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo, struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg) { - struct nvkm_mem *mem = old_reg->mm_node; - u64 src_offset = mem->vma[0].offset; - u64 dst_offset = mem->vma[1].offset; + struct nouveau_mem *mem = nouveau_mem(old_reg); + u64 src_offset = mem->vma[0].addr; + u64 dst_offset = mem->vma[1].addr; u32 page_count = new_reg->num_pages; int ret; @@ -768,9 +832,9 @@ static int nva3_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo, struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg) { - struct nvkm_mem *mem = old_reg->mm_node; - u64 src_offset = mem->vma[0].offset; - u64 dst_offset = mem->vma[1].offset; + struct nouveau_mem *mem = nouveau_mem(old_reg); + u64 src_offset = mem->vma[0].addr; + u64 dst_offset = mem->vma[1].addr; u32 page_count = new_reg->num_pages; int ret; @@ -806,14 +870,14 @@ static int nv98_bo_move_exec(struct nouveau_channel *chan, struct ttm_buffer_object *bo, struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg) { - struct nvkm_mem *mem = old_reg->mm_node; + struct nouveau_mem *mem = nouveau_mem(old_reg); int ret = RING_SPACE(chan, 7); if (ret == 0) { BEGIN_NV04(chan, NvSubCopy, 0x0320, 6); - OUT_RING (chan, upper_32_bits(mem->vma[0].offset)); - OUT_RING (chan, lower_32_bits(mem->vma[0].offset)); - OUT_RING (chan, upper_32_bits(mem->vma[1].offset)); - OUT_RING (chan, lower_32_bits(mem->vma[1].offset)); + OUT_RING (chan, upper_32_bits(mem->vma[0].addr)); + OUT_RING (chan, lower_32_bits(mem->vma[0].addr)); + OUT_RING (chan, upper_32_bits(mem->vma[1].addr)); + OUT_RING (chan, lower_32_bits(mem->vma[1].addr)); OUT_RING (chan, 0x00000000 /* COPY */); OUT_RING (chan, new_reg->num_pages << PAGE_SHIFT); } @@ -824,15 +888,15 @@ static int nv84_bo_move_exec(struct nouveau_channel *chan, struct ttm_buffer_object *bo, struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg) { - struct nvkm_mem *mem = old_reg->mm_node; + struct nouveau_mem *mem = nouveau_mem(old_reg); int ret = RING_SPACE(chan, 7); if (ret == 0) { BEGIN_NV04(chan, NvSubCopy, 0x0304, 6); OUT_RING (chan, new_reg->num_pages << PAGE_SHIFT); - OUT_RING (chan, upper_32_bits(mem->vma[0].offset)); - OUT_RING (chan, lower_32_bits(mem->vma[0].offset)); - OUT_RING (chan, upper_32_bits(mem->vma[1].offset)); - OUT_RING (chan, lower_32_bits(mem->vma[1].offset)); + OUT_RING (chan, upper_32_bits(mem->vma[0].addr)); + OUT_RING (chan, lower_32_bits(mem->vma[0].addr)); + OUT_RING (chan, upper_32_bits(mem->vma[1].addr)); + OUT_RING (chan, lower_32_bits(mem->vma[1].addr)); OUT_RING (chan, 0x00000000 /* MODE_COPY, QUERY_NONE */); } return ret; @@ -858,12 +922,12 @@ static int nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo, struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg) { - struct nvkm_mem *mem = old_reg->mm_node; + struct nouveau_mem *mem = nouveau_mem(old_reg); u64 length = (new_reg->num_pages << PAGE_SHIFT); - u64 src_offset = mem->vma[0].offset; - u64 dst_offset = mem->vma[1].offset; - int src_tiled = !!mem->memtype; - int dst_tiled = !!((struct nvkm_mem *)new_reg->mm_node)->memtype; + u64 src_offset = mem->vma[0].addr; + u64 dst_offset = mem->vma[1].addr; + int src_tiled = !!mem->kind; + int dst_tiled = !!nouveau_mem(new_reg)->kind; int ret; while (length) { @@ -1000,25 +1064,31 @@ static int nouveau_bo_move_prep(struct nouveau_drm *drm, struct ttm_buffer_object *bo, struct ttm_mem_reg *reg) { - struct nvkm_mem *old_mem = bo->mem.mm_node; - struct nvkm_mem *new_mem = reg->mm_node; - u64 size = (u64)reg->num_pages << PAGE_SHIFT; + struct nouveau_mem *old_mem = nouveau_mem(&bo->mem); + struct nouveau_mem *new_mem = nouveau_mem(reg); + struct nvif_vmm *vmm = &drm->client.vmm.vmm; int ret; - ret = nvkm_vm_get(drm->client.vm, size, old_mem->page_shift, - NV_MEM_ACCESS_RW, &old_mem->vma[0]); + ret = nvif_vmm_get(vmm, LAZY, false, old_mem->mem.page, 0, + old_mem->mem.size, &old_mem->vma[0]); if (ret) return ret; - ret = nvkm_vm_get(drm->client.vm, size, new_mem->page_shift, - NV_MEM_ACCESS_RW, &old_mem->vma[1]); + ret = nvif_vmm_get(vmm, LAZY, false, new_mem->mem.page, 0, + new_mem->mem.size, &old_mem->vma[1]); + if (ret) + goto done; + + ret = nouveau_mem_map(old_mem, vmm, &old_mem->vma[0]); + if (ret) + goto done; + + ret = nouveau_mem_map(new_mem, vmm, &old_mem->vma[1]); +done: if (ret) { - nvkm_vm_put(&old_mem->vma[0]); - return ret; + nvif_vmm_put(vmm, &old_mem->vma[1]); + nvif_vmm_put(vmm, &old_mem->vma[0]); } - - nvkm_vm_map(&old_mem->vma[0], old_mem); - nvkm_vm_map(&old_mem->vma[1], new_mem); return 0; } @@ -1200,21 +1270,23 @@ static void nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, bool evict, struct ttm_mem_reg *new_reg) { + struct nouveau_mem *mem = new_reg ? nouveau_mem(new_reg) : NULL; struct nouveau_bo *nvbo = nouveau_bo(bo); - struct nvkm_vma *vma; + struct nouveau_vma *vma; /* ttm can now (stupidly) pass the driver bos it didn't create... */ if (bo->destroy != nouveau_bo_del_ttm) return; - list_for_each_entry(vma, &nvbo->vma_list, head) { - if (new_reg && new_reg->mem_type != TTM_PL_SYSTEM && - (new_reg->mem_type == TTM_PL_VRAM || - nvbo->page_shift != vma->vm->mmu->lpg_shift)) { - nvkm_vm_map(vma, new_reg->mm_node); - } else { + if (mem && new_reg->mem_type != TTM_PL_SYSTEM && + mem->mem.page == nvbo->page) { + list_for_each_entry(vma, &nvbo->vma_list, head) { + nouveau_vma_map(vma, mem); + } + } else { + list_for_each_entry(vma, &nvbo->vma_list, head) { WARN_ON(ttm_bo_wait(bo, false, false)); - nvkm_vm_unmap(vma); + nouveau_vma_unmap(vma); } } } @@ -1234,8 +1306,7 @@ nouveau_bo_vm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_reg, if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) { *new_tile = nv10_bo_set_tiling(dev, offset, new_reg->size, - nvbo->tile_mode, - nvbo->tile_flags); + nvbo->mode, nvbo->zeta); } return 0; @@ -1331,8 +1402,7 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg) struct ttm_mem_type_manager *man = &bdev->man[reg->mem_type]; struct nouveau_drm *drm = nouveau_bdev(bdev); struct nvkm_device *device = nvxx_device(&drm->client.device); - struct nvkm_mem *mem = reg->mm_node; - int ret; + struct nouveau_mem *mem = nouveau_mem(reg); reg->bus.addr = NULL; reg->bus.offset = 0; @@ -1353,7 +1423,7 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg) reg->bus.is_iomem = !drm->agp.cma; } #endif - if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA || !mem->memtype) + if (drm->client.mem->oclass < NVIF_CLASS_MEM_NV50 || !mem->kind) /* untiled */ break; /* fallthrough, tiled memory */ @@ -1361,19 +1431,40 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg) reg->bus.offset = reg->start << PAGE_SHIFT; reg->bus.base = device->func->resource_addr(device, 1); reg->bus.is_iomem = true; - if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) { - struct nvkm_bar *bar = nvxx_bar(&drm->client.device); - int page_shift = 12; - if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_FERMI) - page_shift = mem->page_shift; + if (drm->client.mem->oclass >= NVIF_CLASS_MEM_NV50) { + union { + struct nv50_mem_map_v0 nv50; + struct gf100_mem_map_v0 gf100; + } args; + u64 handle, length; + u32 argc = 0; + int ret; + + switch (mem->mem.object.oclass) { + case NVIF_CLASS_MEM_NV50: + args.nv50.version = 0; + args.nv50.ro = 0; + args.nv50.kind = mem->kind; + args.nv50.comp = mem->comp; + break; + case NVIF_CLASS_MEM_GF100: + args.gf100.version = 0; + args.gf100.ro = 0; + args.gf100.kind = mem->kind; + break; + default: + WARN_ON(1); + break; + } - ret = nvkm_bar_umap(bar, mem->size << 12, page_shift, - &mem->bar_vma); - if (ret) - return ret; + ret = nvif_object_map_handle(&mem->mem.object, + &argc, argc, + &handle, &length); + if (ret != 1) + return ret ? ret : -EINVAL; - nvkm_vm_map(&mem->bar_vma, mem); - reg->bus.offset = mem->bar_vma.offset; + reg->bus.base = 0; + reg->bus.offset = handle; } break; default: @@ -1385,13 +1476,22 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg) static void nouveau_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg) { - struct nvkm_mem *mem = reg->mm_node; - - if (!mem->bar_vma.node) - return; + struct nouveau_drm *drm = nouveau_bdev(bdev); + struct nouveau_mem *mem = nouveau_mem(reg); - nvkm_vm_unmap(&mem->bar_vma); - nvkm_vm_put(&mem->bar_vma); + if (drm->client.mem->oclass >= NVIF_CLASS_MEM_NV50) { + switch (reg->mem_type) { + case TTM_PL_TT: + if (mem->kind) + nvif_object_unmap_handle(&mem->mem.object); + break; + case TTM_PL_VRAM: + nvif_object_unmap_handle(&mem->mem.object); + break; + default: + break; + } + } } static int @@ -1408,7 +1508,7 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo) */ if (bo->mem.mem_type != TTM_PL_VRAM) { if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA || - !nouveau_bo_tile_layout(nvbo)) + !nvbo->kind) return 0; if (bo->mem.mem_type == TTM_PL_SYSTEM) { @@ -1445,9 +1545,7 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm) { struct ttm_dma_tt *ttm_dma = (void *)ttm; struct nouveau_drm *drm; - struct nvkm_device *device; - struct drm_device *dev; - struct device *pdev; + struct device *dev; unsigned i; int r; bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); @@ -1464,9 +1562,7 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm) } drm = nouveau_bdev(ttm->bdev); - device = nvxx_device(&drm->client.device); - dev = drm->dev; - pdev = device->dev; + dev = drm->dev->dev; #if IS_ENABLED(CONFIG_AGP) if (drm->agp.bridge) { @@ -1476,7 +1572,7 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm) #if IS_ENABLED(CONFIG_SWIOTLB) && IS_ENABLED(CONFIG_X86) if (swiotlb_nr_tbl()) { - return ttm_dma_populate((void *)ttm, dev->dev); + return ttm_dma_populate((void *)ttm, dev); } #endif @@ -1488,12 +1584,12 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm) for (i = 0; i < ttm->num_pages; i++) { dma_addr_t addr; - addr = dma_map_page(pdev, ttm->pages[i], 0, PAGE_SIZE, + addr = dma_map_page(dev, ttm->pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); - if (dma_mapping_error(pdev, addr)) { + if (dma_mapping_error(dev, addr)) { while (i--) { - dma_unmap_page(pdev, ttm_dma->dma_address[i], + dma_unmap_page(dev, ttm_dma->dma_address[i], PAGE_SIZE, DMA_BIDIRECTIONAL); ttm_dma->dma_address[i] = 0; } @@ -1511,9 +1607,7 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm) { struct ttm_dma_tt *ttm_dma = (void *)ttm; struct nouveau_drm *drm; - struct nvkm_device *device; - struct drm_device *dev; - struct device *pdev; + struct device *dev; unsigned i; bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); @@ -1521,9 +1615,7 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm) return; drm = nouveau_bdev(ttm->bdev); - device = nvxx_device(&drm->client.device); - dev = drm->dev; - pdev = device->dev; + dev = drm->dev->dev; #if IS_ENABLED(CONFIG_AGP) if (drm->agp.bridge) { @@ -1534,14 +1626,14 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm) #if IS_ENABLED(CONFIG_SWIOTLB) && IS_ENABLED(CONFIG_X86) if (swiotlb_nr_tbl()) { - ttm_dma_unpopulate((void *)ttm, dev->dev); + ttm_dma_unpopulate((void *)ttm, dev); return; } #endif for (i = 0; i < ttm->num_pages; i++) { if (ttm_dma->dma_address[i]) { - dma_unmap_page(pdev, ttm_dma->dma_address[i], PAGE_SIZE, + dma_unmap_page(dev, ttm_dma->dma_address[i], PAGE_SIZE, DMA_BIDIRECTIONAL); } } @@ -1576,48 +1668,3 @@ struct ttm_bo_driver nouveau_bo_driver = { .io_mem_free = &nouveau_ttm_io_mem_free, .io_mem_pfn = ttm_bo_default_io_mem_pfn, }; - -struct nvkm_vma * -nouveau_bo_vma_find(struct nouveau_bo *nvbo, struct nvkm_vm *vm) -{ - struct nvkm_vma *vma; - list_for_each_entry(vma, &nvbo->vma_list, head) { - if (vma->vm == vm) - return vma; - } - - return NULL; -} - -int -nouveau_bo_vma_add(struct nouveau_bo *nvbo, struct nvkm_vm *vm, - struct nvkm_vma *vma) -{ - const u32 size = nvbo->bo.mem.num_pages << PAGE_SHIFT; - int ret; - - ret = nvkm_vm_get(vm, size, nvbo->page_shift, - NV_MEM_ACCESS_RW, vma); - if (ret) - return ret; - - if ( nvbo->bo.mem.mem_type != TTM_PL_SYSTEM && - (nvbo->bo.mem.mem_type == TTM_PL_VRAM || - nvbo->page_shift != vma->vm->mmu->lpg_shift)) - nvkm_vm_map(vma, nvbo->bo.mem.mm_node); - - list_add_tail(&vma->head, &nvbo->vma_list); - vma->refcount = 1; - return 0; -} - -void -nouveau_bo_vma_del(struct nouveau_bo *nvbo, struct nvkm_vma *vma) -{ - if (vma->node) { - if (nvbo->bo.mem.mem_type != TTM_PL_SYSTEM) - nvkm_vm_unmap(vma); - nvkm_vm_put(vma); - list_del(&vma->head); - } -} diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h index b06a5385d6dd..7b5cc5c73d20 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.h +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NOUVEAU_BO_H__ #define __NOUVEAU_BO_H__ @@ -24,12 +25,16 @@ struct nouveau_bo { bool validate_mapped; struct list_head vma_list; - unsigned page_shift; struct nouveau_cli *cli; - u32 tile_mode; - u32 tile_flags; + unsigned contig:1; + unsigned page:5; + unsigned kind:8; + unsigned comp:3; + unsigned zeta:3; + unsigned mode; + struct nouveau_drm_tile *tile; /* Only valid if allocated via nouveau_gem_new() and iff you hold a @@ -89,13 +94,6 @@ int nouveau_bo_validate(struct nouveau_bo *, bool interruptible, void nouveau_bo_sync_for_device(struct nouveau_bo *nvbo); void nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo); -struct nvkm_vma * -nouveau_bo_vma_find(struct nouveau_bo *, struct nvkm_vm *); - -int nouveau_bo_vma_add(struct nouveau_bo *, struct nvkm_vm *, - struct nvkm_vma *); -void nouveau_bo_vma_del(struct nouveau_bo *, struct nvkm_vma *); - /* TODO: submit equivalent to TTM generic API upstream? */ static inline void __iomem * nvbo_kmap_obj_iovirtual(struct nouveau_bo *nvbo) diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index dbc41fa86ee8..af1116655910 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -40,6 +40,7 @@ #include "nouveau_chan.h" #include "nouveau_fence.h" #include "nouveau_abi16.h" +#include "nouveau_vmm.h" MODULE_PARM_DESC(vram_pushbuf, "Create DMA push buffers in VRAM"); int nouveau_vram_pushbuf; @@ -83,6 +84,14 @@ nouveau_channel_del(struct nouveau_channel **pchan) { struct nouveau_channel *chan = *pchan; if (chan) { + struct nouveau_cli *cli = (void *)chan->user.client; + bool super; + + if (cli) { + super = cli->base.super; + cli->base.super = true; + } + if (chan->fence) nouveau_fence(chan->drm)->context_del(chan); nvif_object_fini(&chan->nvsw); @@ -91,12 +100,15 @@ nouveau_channel_del(struct nouveau_channel **pchan) nvif_notify_fini(&chan->kill); nvif_object_fini(&chan->user); nvif_object_fini(&chan->push.ctxdma); - nouveau_bo_vma_del(chan->push.buffer, &chan->push.vma); + nouveau_vma_del(&chan->push.vma); nouveau_bo_unmap(chan->push.buffer); if (chan->push.buffer && chan->push.buffer->pin_refcnt) nouveau_bo_unpin(chan->push.buffer); nouveau_bo_ref(NULL, &chan->push.buffer); kfree(chan); + + if (cli) + cli->base.super = super; } *pchan = NULL; } @@ -106,7 +118,6 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device, u32 size, struct nouveau_channel **pchan) { struct nouveau_cli *cli = (void *)device->object.client; - struct nvkm_mmu *mmu = nvxx_mmu(device); struct nv_dma_v0 args = {}; struct nouveau_channel *chan; u32 target; @@ -142,11 +153,11 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device, * pushbuf lives in, this is because the GEM code requires that * we be able to call out to other (indirect) push buffers */ - chan->push.vma.offset = chan->push.buffer->bo.offset; + chan->push.addr = chan->push.buffer->bo.offset; if (device->info.family >= NV_DEVICE_INFO_V0_TESLA) { - ret = nouveau_bo_vma_add(chan->push.buffer, cli->vm, - &chan->push.vma); + ret = nouveau_vma_new(chan->push.buffer, &cli->vmm, + &chan->push.vma); if (ret) { nouveau_channel_del(pchan); return ret; @@ -155,7 +166,9 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device, args.target = NV_DMA_V0_TARGET_VM; args.access = NV_DMA_V0_ACCESS_VM; args.start = 0; - args.limit = cli->vm->mmu->limit - 1; + args.limit = cli->vmm.vmm.limit - 1; + + chan->push.addr = chan->push.vma->addr; } else if (chan->push.buffer->bo.mem.mem_type == TTM_PL_VRAM) { if (device->info.family == NV_DEVICE_INFO_V0_TNT) { @@ -185,7 +198,7 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device, args.target = NV_DMA_V0_TARGET_VM; args.access = NV_DMA_V0_ACCESS_RDWR; args.start = 0; - args.limit = mmu->limit - 1; + args.limit = cli->vmm.vmm.limit - 1; } } @@ -203,6 +216,7 @@ static int nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device, u32 engine, struct nouveau_channel **pchan) { + struct nouveau_cli *cli = (void *)device->object.client; static const u16 oclasses[] = { PASCAL_CHANNEL_GPFIFO_A, MAXWELL_CHANNEL_GPFIFO_A, KEPLER_CHANNEL_GPFIFO_B, @@ -233,22 +247,22 @@ nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device, args.kepler.version = 0; args.kepler.engines = engine; args.kepler.ilength = 0x02000; - args.kepler.ioffset = 0x10000 + chan->push.vma.offset; - args.kepler.vm = 0; + args.kepler.ioffset = 0x10000 + chan->push.addr; + args.kepler.vmm = nvif_handle(&cli->vmm.vmm.object); size = sizeof(args.kepler); } else if (oclass[0] >= FERMI_CHANNEL_GPFIFO) { args.fermi.version = 0; args.fermi.ilength = 0x02000; - args.fermi.ioffset = 0x10000 + chan->push.vma.offset; - args.fermi.vm = 0; + args.fermi.ioffset = 0x10000 + chan->push.addr; + args.fermi.vmm = nvif_handle(&cli->vmm.vmm.object); size = sizeof(args.fermi); } else { args.nv50.version = 0; args.nv50.ilength = 0x02000; - args.nv50.ioffset = 0x10000 + chan->push.vma.offset; + args.nv50.ioffset = 0x10000 + chan->push.addr; args.nv50.pushbuf = nvif_handle(&chan->push.ctxdma); - args.nv50.vm = 0; + args.nv50.vmm = nvif_handle(&cli->vmm.vmm.object); size = sizeof(args.nv50); } @@ -293,7 +307,7 @@ nouveau_channel_dma(struct nouveau_drm *drm, struct nvif_device *device, /* create channel object */ args.version = 0; args.pushbuf = nvif_handle(&chan->push.ctxdma); - args.offset = chan->push.vma.offset; + args.offset = chan->push.addr; do { ret = nvif_object_init(&device->object, 0, *oclass++, @@ -314,11 +328,10 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart) struct nvif_device *device = chan->device; struct nouveau_cli *cli = (void *)chan->user.client; struct nouveau_drm *drm = chan->drm; - struct nvkm_mmu *mmu = nvxx_mmu(device); struct nv_dma_v0 args = {}; int ret, i; - nvif_object_map(&chan->user); + nvif_object_map(&chan->user, NULL, 0); if (chan->user.oclass >= FERMI_CHANNEL_GPFIFO) { ret = nvif_notify_init(&chan->user, nouveau_channel_killed, @@ -339,7 +352,7 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart) args.target = NV_DMA_V0_TARGET_VM; args.access = NV_DMA_V0_ACCESS_VM; args.start = 0; - args.limit = cli->vm->mmu->limit - 1; + args.limit = cli->vmm.vmm.limit - 1; } else { args.target = NV_DMA_V0_TARGET_VRAM; args.access = NV_DMA_V0_ACCESS_RDWR; @@ -356,7 +369,7 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart) args.target = NV_DMA_V0_TARGET_VM; args.access = NV_DMA_V0_ACCESS_VM; args.start = 0; - args.limit = cli->vm->mmu->limit - 1; + args.limit = cli->vmm.vmm.limit - 1; } else if (chan->drm->agp.bridge) { args.target = NV_DMA_V0_TARGET_AGP; @@ -368,7 +381,7 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart) args.target = NV_DMA_V0_TARGET_VM; args.access = NV_DMA_V0_ACCESS_RDWR; args.start = 0; - args.limit = mmu->limit - 1; + args.limit = cli->vmm.vmm.limit - 1; } ret = nvif_object_init(&chan->user, gart, NV_DMA_IN_MEMORY, diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h b/drivers/gpu/drm/nouveau/nouveau_chan.h index 46b947ba1cf4..14607c16a2bd 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.h +++ b/drivers/gpu/drm/nouveau/nouveau_chan.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NOUVEAU_CHAN_H__ #define __NOUVEAU_CHAN_H__ #include <nvif/object.h> @@ -16,8 +17,9 @@ struct nouveau_channel { struct { struct nouveau_bo *buffer; - struct nvkm_vma vma; + struct nouveau_vma *vma; struct nvif_object ctxdma; + u64 addr; } push; /* TODO: this will be reworked in the near future */ diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.h b/drivers/gpu/drm/nouveau/nouveau_debugfs.h index b799f8dfb2b2..1d01a82d4b6f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.h +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NOUVEAU_DEBUGFS_H__ #define __NOUVEAU_DEBUGFS_H__ diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h index 201aec2ea5b8..270ba56f2756 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.h +++ b/drivers/gpu/drm/nouveau/nouveau_display.h @@ -1,14 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NOUVEAU_DISPLAY_H__ #define __NOUVEAU_DISPLAY_H__ - -#include <subdev/mmu.h> - #include "nouveau_drv.h" struct nouveau_framebuffer { struct drm_framebuffer base; struct nouveau_bo *nvbo; - struct nvkm_vma vma; + struct nouveau_vma *vma; u32 r_handle; u32 r_format; u32 r_pitch; diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c index 2634a1a79888..10e84f6ca2b7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.c +++ b/drivers/gpu/drm/nouveau/nouveau_dma.c @@ -26,6 +26,7 @@ #include "nouveau_drv.h" #include "nouveau_dma.h" +#include "nouveau_vmm.h" void OUT_RINGp(struct nouveau_channel *chan, const void *data, unsigned nr_dwords) @@ -71,11 +72,11 @@ READ_GET(struct nouveau_channel *chan, uint64_t *prev_get, int *timeout) return -EBUSY; } - if (val < chan->push.vma.offset || - val > chan->push.vma.offset + (chan->dma.max << 2)) + if (val < chan->push.addr || + val > chan->push.addr + (chan->dma.max << 2)) return -EINVAL; - return (val - chan->push.vma.offset) >> 2; + return (val - chan->push.addr) >> 2; } void @@ -84,13 +85,13 @@ nv50_dma_push(struct nouveau_channel *chan, struct nouveau_bo *bo, { struct nouveau_cli *cli = (void *)chan->user.client; struct nouveau_bo *pb = chan->push.buffer; - struct nvkm_vma *vma; + struct nouveau_vma *vma; int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base; u64 offset; - vma = nouveau_bo_vma_find(bo, cli->vm); + vma = nouveau_vma_find(bo, &cli->vmm); BUG_ON(!vma); - offset = vma->offset + delta; + offset = vma->addr + delta; BUG_ON(chan->dma.ib_free < 1); @@ -224,7 +225,7 @@ nouveau_dma_wait(struct nouveau_channel *chan, int slots, int size) * instruct the GPU to jump back to the start right * after processing the currently pending commands. */ - OUT_RING(chan, chan->push.vma.offset | 0x20000000); + OUT_RING(chan, chan->push.addr | 0x20000000); /* wait for GET to depart from the skips area. * prevents writing GET==PUT and causing a race diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h index aff3a9d0a1fc..74e10b14a7da 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.h +++ b/drivers/gpu/drm/nouveau/nouveau_dma.h @@ -140,7 +140,7 @@ BEGIN_IMC0(struct nouveau_channel *chan, int subc, int mthd, u16 data) #define WRITE_PUT(val) do { \ mb(); \ nouveau_bo_rd32(chan->push.buffer, 0); \ - nvif_wr32(&chan->user, chan->user_put, ((val) << 2) + chan->push.vma.offset); \ + nvif_wr32(&chan->user, chan->user_put, ((val) << 2) + chan->push.addr);\ } while (0) static inline void diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 595630d1fb9e..8d4a5be3b913 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -111,33 +111,119 @@ nouveau_name(struct drm_device *dev) return nouveau_platform_name(to_platform_device(dev->dev)); } +static inline bool +nouveau_cli_work_ready(struct dma_fence *fence, bool wait) +{ + if (!dma_fence_is_signaled(fence)) { + if (!wait) + return false; + WARN_ON(dma_fence_wait_timeout(fence, false, 2 * HZ) <= 0); + } + dma_fence_put(fence); + return true; +} + +static void +nouveau_cli_work_flush(struct nouveau_cli *cli, bool wait) +{ + struct nouveau_cli_work *work, *wtmp; + mutex_lock(&cli->lock); + list_for_each_entry_safe(work, wtmp, &cli->worker, head) { + if (!work->fence || nouveau_cli_work_ready(work->fence, wait)) { + list_del(&work->head); + work->func(work); + } + } + mutex_unlock(&cli->lock); +} + +static void +nouveau_cli_work_fence(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct nouveau_cli_work *work = container_of(cb, typeof(*work), cb); + schedule_work(&work->cli->work); +} + +void +nouveau_cli_work_queue(struct nouveau_cli *cli, struct dma_fence *fence, + struct nouveau_cli_work *work) +{ + work->fence = dma_fence_get(fence); + work->cli = cli; + mutex_lock(&cli->lock); + list_add_tail(&work->head, &cli->worker); + mutex_unlock(&cli->lock); + if (dma_fence_add_callback(fence, &work->cb, nouveau_cli_work_fence)) + nouveau_cli_work_fence(fence, &work->cb); +} + +static void +nouveau_cli_work(struct work_struct *w) +{ + struct nouveau_cli *cli = container_of(w, typeof(*cli), work); + nouveau_cli_work_flush(cli, false); +} + static void nouveau_cli_fini(struct nouveau_cli *cli) { - nvkm_vm_ref(NULL, &nvxx_client(&cli->base)->vm, NULL); + nouveau_cli_work_flush(cli, true); usif_client_fini(cli); + nouveau_vmm_fini(&cli->vmm); + nvif_mmu_fini(&cli->mmu); nvif_device_fini(&cli->device); + mutex_lock(&cli->drm->master.lock); nvif_client_fini(&cli->base); + mutex_unlock(&cli->drm->master.lock); } static int nouveau_cli_init(struct nouveau_drm *drm, const char *sname, struct nouveau_cli *cli) { + static const struct nvif_mclass + mems[] = { + { NVIF_CLASS_MEM_GF100, -1 }, + { NVIF_CLASS_MEM_NV50 , -1 }, + { NVIF_CLASS_MEM_NV04 , -1 }, + {} + }; + static const struct nvif_mclass + mmus[] = { + { NVIF_CLASS_MMU_GF100, -1 }, + { NVIF_CLASS_MMU_NV50 , -1 }, + { NVIF_CLASS_MMU_NV04 , -1 }, + {} + }; + static const struct nvif_mclass + vmms[] = { + { NVIF_CLASS_VMM_GP100, -1 }, + { NVIF_CLASS_VMM_GM200, -1 }, + { NVIF_CLASS_VMM_GF100, -1 }, + { NVIF_CLASS_VMM_NV50 , -1 }, + { NVIF_CLASS_VMM_NV04 , -1 }, + {} + }; u64 device = nouveau_name(drm->dev); int ret; snprintf(cli->name, sizeof(cli->name), "%s", sname); - cli->dev = drm->dev; + cli->drm = drm; mutex_init(&cli->mutex); usif_client_init(cli); - if (cli == &drm->client) { + INIT_WORK(&cli->work, nouveau_cli_work); + INIT_LIST_HEAD(&cli->worker); + mutex_init(&cli->lock); + + if (cli == &drm->master) { ret = nvif_driver_init(NULL, nouveau_config, nouveau_debug, cli->name, device, &cli->base); } else { - ret = nvif_client_init(&drm->client.base, cli->name, device, + mutex_lock(&drm->master.lock); + ret = nvif_client_init(&drm->master.base, cli->name, device, &cli->base); + mutex_unlock(&drm->master.lock); } if (ret) { NV_ERROR(drm, "Client allocation failed: %d\n", ret); @@ -154,6 +240,38 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname, goto done; } + ret = nvif_mclass(&cli->device.object, mmus); + if (ret < 0) { + NV_ERROR(drm, "No supported MMU class\n"); + goto done; + } + + ret = nvif_mmu_init(&cli->device.object, mmus[ret].oclass, &cli->mmu); + if (ret) { + NV_ERROR(drm, "MMU allocation failed: %d\n", ret); + goto done; + } + + ret = nvif_mclass(&cli->mmu.object, vmms); + if (ret < 0) { + NV_ERROR(drm, "No supported VMM class\n"); + goto done; + } + + ret = nouveau_vmm_init(cli, vmms[ret].oclass, &cli->vmm); + if (ret) { + NV_ERROR(drm, "VMM allocation failed: %d\n", ret); + goto done; + } + + ret = nvif_mclass(&cli->mmu.object, mems); + if (ret < 0) { + NV_ERROR(drm, "No supported MEM class\n"); + goto done; + } + + cli->mem = &mems[ret]; + return 0; done: if (ret) nouveau_cli_fini(cli); @@ -433,6 +551,10 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags) dev->dev_private = drm; drm->dev = dev; + ret = nouveau_cli_init(drm, "DRM-master", &drm->master); + if (ret) + return ret; + ret = nouveau_cli_init(drm, "DRM", &drm->client); if (ret) return ret; @@ -456,21 +578,6 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags) nouveau_vga_init(drm); - if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) { - if (!nvxx_device(&drm->client.device)->mmu) { - ret = -ENOSYS; - goto fail_device; - } - - ret = nvkm_vm_new(nvxx_device(&drm->client.device), - 0, (1ULL << 40), 0x1000, NULL, - &drm->client.vm); - if (ret) - goto fail_device; - - nvxx_client(&drm->client.base)->vm = drm->client.vm; - } - ret = nouveau_ttm_init(drm); if (ret) goto fail_ttm; @@ -516,8 +623,8 @@ fail_bios: nouveau_ttm_fini(drm); fail_ttm: nouveau_vga_fini(drm); -fail_device: nouveau_cli_fini(&drm->client); + nouveau_cli_fini(&drm->master); kfree(drm); return ret; } @@ -550,6 +657,7 @@ nouveau_drm_unload(struct drm_device *dev) if (drm->hdmi_device) pci_dev_put(drm->hdmi_device); nouveau_cli_fini(&drm->client); + nouveau_cli_fini(&drm->master); kfree(drm); } @@ -618,7 +726,7 @@ nouveau_do_suspend(struct drm_device *dev, bool runtime) } NV_DEBUG(drm, "suspending object tree...\n"); - ret = nvif_client_suspend(&drm->client.base); + ret = nvif_client_suspend(&drm->master.base); if (ret) goto fail_client; @@ -642,7 +750,7 @@ nouveau_do_resume(struct drm_device *dev, bool runtime) struct nouveau_drm *drm = nouveau_drm(dev); NV_DEBUG(drm, "resuming object tree...\n"); - nvif_client_resume(&drm->client.base); + nvif_client_resume(&drm->master.base); NV_DEBUG(drm, "resuming fence...\n"); if (drm->fence && nouveau_fence(drm)->resume) @@ -850,15 +958,6 @@ nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv) cli->base.super = false; - if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) { - ret = nvkm_vm_new(nvxx_device(&drm->client.device), 0, - (1ULL << 40), 0x1000, NULL, &cli->vm); - if (ret) - goto done; - - nvxx_client(&cli->base)->vm = cli->vm; - } - fpriv->driver_priv = cli; mutex_lock(&drm->client.mutex); diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 822fe1d4d35e..3331e82ae9e7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NOUVEAU_DRV_H__ #define __NOUVEAU_DRV_H__ @@ -5,7 +6,7 @@ #define DRIVER_EMAIL "nouveau@lists.freedesktop.org" #define DRIVER_NAME "nouveau" -#define DRIVER_DESC "nVidia Riva/TNT/GeForce/Quadro/Tesla" +#define DRIVER_DESC "nVidia Riva/TNT/GeForce/Quadro/Tesla/Tegra K1+" #define DRIVER_DATE "20120801" #define DRIVER_MAJOR 1 @@ -42,6 +43,8 @@ #include <nvif/client.h> #include <nvif/device.h> #include <nvif/ioctl.h> +#include <nvif/mmu.h> +#include <nvif/vmm.h> #include <drm/drmP.h> @@ -61,6 +64,7 @@ struct platform_device; #include "nouveau_fence.h" #include "nouveau_bios.h" +#include "nouveau_vmm.h" struct nouveau_drm_tile { struct nouveau_fence *fence; @@ -86,19 +90,37 @@ enum nouveau_drm_handle { struct nouveau_cli { struct nvif_client base; - struct drm_device *dev; + struct nouveau_drm *drm; struct mutex mutex; struct nvif_device device; + struct nvif_mmu mmu; + struct nouveau_vmm vmm; + const struct nvif_mclass *mem; - struct nvkm_vm *vm; /*XXX*/ struct list_head head; void *abi16; struct list_head objects; struct list_head notifys; char name[32]; + + struct work_struct work; + struct list_head worker; + struct mutex lock; }; +struct nouveau_cli_work { + void (*func)(struct nouveau_cli_work *); + struct nouveau_cli *cli; + struct list_head head; + + struct dma_fence *fence; + struct dma_fence_cb cb; +}; + +void nouveau_cli_work_queue(struct nouveau_cli *, struct dma_fence *, + struct nouveau_cli_work *); + static inline struct nouveau_cli * nouveau_cli(struct drm_file *fpriv) { @@ -109,6 +131,7 @@ nouveau_cli(struct drm_file *fpriv) #include <nvif/device.h> struct nouveau_drm { + struct nouveau_cli master; struct nouveau_cli client; struct drm_device *dev; @@ -133,6 +156,9 @@ struct nouveau_drm { struct nouveau_channel *chan; struct nvif_object copy; int mtrr; + int type_vram; + int type_host; + int type_ncoh; } ttm; /* GEM interface support */ @@ -204,7 +230,7 @@ void nouveau_drm_device_remove(struct drm_device *dev); #define NV_PRINTK(l,c,f,a...) do { \ struct nouveau_cli *_cli = (c); \ - dev_##l(_cli->dev->dev, "%s: "f, _cli->name, ##a); \ + dev_##l(_cli->drm->dev->dev, "%s: "f, _cli->name, ##a); \ } while(0) #define NV_FATAL(drm,f,a...) NV_PRINTK(crit, &(drm)->client, f, ##a) #define NV_ERROR(drm,f,a...) NV_PRINTK(err, &(drm)->client, f, ##a) diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index f7707849bb53..c533d8e04afc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -48,6 +48,7 @@ #include "nouveau_bo.h" #include "nouveau_fbcon.h" #include "nouveau_chan.h" +#include "nouveau_vmm.h" #include "nouveau_crtc.h" @@ -223,7 +224,7 @@ void nouveau_fbcon_accel_save_disable(struct drm_device *dev) { struct nouveau_drm *drm = nouveau_drm(dev); - if (drm->fbcon) { + if (drm->fbcon && drm->fbcon->helper.fbdev) { drm->fbcon->saved_flags = drm->fbcon->helper.fbdev->flags; drm->fbcon->helper.fbdev->flags |= FBINFO_HWACCEL_DISABLED; } @@ -233,7 +234,7 @@ void nouveau_fbcon_accel_restore(struct drm_device *dev) { struct nouveau_drm *drm = nouveau_drm(dev); - if (drm->fbcon) { + if (drm->fbcon && drm->fbcon->helper.fbdev) { drm->fbcon->helper.fbdev->flags = drm->fbcon->saved_flags; } } @@ -245,7 +246,8 @@ nouveau_fbcon_accel_fini(struct drm_device *dev) struct nouveau_fbdev *fbcon = drm->fbcon; if (fbcon && drm->channel) { console_lock(); - fbcon->helper.fbdev->flags |= FBINFO_HWACCEL_DISABLED; + if (fbcon->helper.fbdev) + fbcon->helper.fbdev->flags |= FBINFO_HWACCEL_DISABLED; console_unlock(); nouveau_channel_idle(drm->channel); nvif_object_fini(&fbcon->twod); @@ -347,7 +349,7 @@ nouveau_fbcon_create(struct drm_fb_helper *helper, chan = nouveau_nofbaccel ? NULL : drm->channel; if (chan && device->info.family >= NV_DEVICE_INFO_V0_TESLA) { - ret = nouveau_bo_vma_add(nvbo, drm->client.vm, &fb->vma); + ret = nouveau_vma_new(nvbo, &drm->client.vmm, &fb->vma); if (ret) { NV_ERROR(drm, "failed to map fb into chan: %d\n", ret); chan = NULL; @@ -401,7 +403,7 @@ nouveau_fbcon_create(struct drm_fb_helper *helper, out_unlock: if (chan) - nouveau_bo_vma_del(fb->nvbo, &fb->vma); + nouveau_vma_del(&fb->vma); nouveau_bo_unmap(fb->nvbo); out_unpin: nouveau_bo_unpin(fb->nvbo); @@ -428,7 +430,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon) drm_fb_helper_fini(&fbcon->helper); if (nouveau_fb->nvbo) { - nouveau_bo_vma_del(nouveau_fb->nvbo, &nouveau_fb->vma); + nouveau_vma_del(&nouveau_fb->vma); nouveau_bo_unmap(nouveau_fb->nvbo); nouveau_bo_unpin(nouveau_fb->nvbo); drm_framebuffer_unreference(&nouveau_fb->base); diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 99e14e3e0fe4..503fa94dc06d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -199,62 +199,6 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha WARN_ON(ret); } -struct nouveau_fence_work { - struct work_struct work; - struct dma_fence_cb cb; - void (*func)(void *); - void *data; -}; - -static void -nouveau_fence_work_handler(struct work_struct *kwork) -{ - struct nouveau_fence_work *work = container_of(kwork, typeof(*work), work); - work->func(work->data); - kfree(work); -} - -static void nouveau_fence_work_cb(struct dma_fence *fence, struct dma_fence_cb *cb) -{ - struct nouveau_fence_work *work = container_of(cb, typeof(*work), cb); - - schedule_work(&work->work); -} - -void -nouveau_fence_work(struct dma_fence *fence, - void (*func)(void *), void *data) -{ - struct nouveau_fence_work *work; - - if (dma_fence_is_signaled(fence)) - goto err; - - work = kmalloc(sizeof(*work), GFP_KERNEL); - if (!work) { - /* - * this might not be a nouveau fence any more, - * so force a lazy wait here - */ - WARN_ON(nouveau_fence_wait((struct nouveau_fence *)fence, - true, false)); - goto err; - } - - INIT_WORK(&work->work, nouveau_fence_work_handler); - work->func = func; - work->data = data; - - if (dma_fence_add_callback(fence, &work->cb, nouveau_fence_work_cb) < 0) - goto err_free; - return; - -err_free: - kfree(work); -err: - func(data); -} - int nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan) { @@ -474,8 +418,6 @@ nouveau_fence_new(struct nouveau_channel *chan, bool sysmem, if (!fence) return -ENOMEM; - fence->sysmem = sysmem; - ret = nouveau_fence_emit(fence, chan); if (ret) nouveau_fence_unref(&fence); diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index d5e58a38f160..5bd8d30d1657 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NOUVEAU_FENCE_H__ #define __NOUVEAU_FENCE_H__ @@ -12,8 +13,6 @@ struct nouveau_fence { struct list_head head; - bool sysmem; - struct nouveau_channel __rcu *channel; unsigned long timeout; }; @@ -24,7 +23,6 @@ void nouveau_fence_unref(struct nouveau_fence **); int nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *); bool nouveau_fence_done(struct nouveau_fence *); -void nouveau_fence_work(struct dma_fence *, void (*)(void *), void *); int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr); int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr); @@ -90,14 +88,12 @@ int nouveau_flip_complete(struct nvif_notify *); struct nv84_fence_chan { struct nouveau_fence_chan base; - struct nvkm_vma vma; - struct nvkm_vma vma_gart; + struct nouveau_vma *vma; }; struct nv84_fence_priv { struct nouveau_fence_priv base; struct nouveau_bo *bo; - struct nouveau_bo *bo_gart; u32 *suspend; struct mutex mutex; }; diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 2170534101ca..efc89aaef66a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -31,6 +31,10 @@ #include "nouveau_ttm.h" #include "nouveau_gem.h" +#include "nouveau_mem.h" +#include "nouveau_vmm.h" + +#include <nvif/class.h> void nouveau_gem_object_del(struct drm_gem_object *gem) @@ -64,66 +68,61 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv) struct nouveau_cli *cli = nouveau_cli(file_priv); struct nouveau_bo *nvbo = nouveau_gem_object(gem); struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); - struct nvkm_vma *vma; struct device *dev = drm->dev->dev; + struct nouveau_vma *vma; int ret; - if (!cli->vm) + if (cli->vmm.vmm.object.oclass < NVIF_CLASS_VMM_NV50) return 0; ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL); if (ret) return ret; - vma = nouveau_bo_vma_find(nvbo, cli->vm); - if (!vma) { - vma = kzalloc(sizeof(*vma), GFP_KERNEL); - if (!vma) { - ret = -ENOMEM; - goto out; - } - - ret = pm_runtime_get_sync(dev); - if (ret < 0 && ret != -EACCES) { - kfree(vma); - goto out; - } - - ret = nouveau_bo_vma_add(nvbo, cli->vm, vma); - if (ret) - kfree(vma); - - pm_runtime_mark_last_busy(dev); - pm_runtime_put_autosuspend(dev); - } else { - vma->refcount++; - } + ret = pm_runtime_get_sync(dev); + if (ret < 0 && ret != -EACCES) + goto out; + ret = nouveau_vma_new(nvbo, &cli->vmm, &vma); + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); out: ttm_bo_unreserve(&nvbo->bo); return ret; } +struct nouveau_gem_object_unmap { + struct nouveau_cli_work work; + struct nouveau_vma *vma; +}; + static void -nouveau_gem_object_delete(void *data) +nouveau_gem_object_delete(struct nouveau_vma *vma) { - struct nvkm_vma *vma = data; - nvkm_vm_unmap(vma); - nvkm_vm_put(vma); - kfree(vma); + nouveau_vma_del(&vma); } static void -nouveau_gem_object_unmap(struct nouveau_bo *nvbo, struct nvkm_vma *vma) +nouveau_gem_object_delete_work(struct nouveau_cli_work *w) +{ + struct nouveau_gem_object_unmap *work = + container_of(w, typeof(*work), work); + nouveau_gem_object_delete(work->vma); + kfree(work); +} + +static void +nouveau_gem_object_unmap(struct nouveau_bo *nvbo, struct nouveau_vma *vma) { const bool mapped = nvbo->bo.mem.mem_type != TTM_PL_SYSTEM; struct reservation_object *resv = nvbo->bo.resv; struct reservation_object_list *fobj; + struct nouveau_gem_object_unmap *work; struct dma_fence *fence = NULL; fobj = reservation_object_get_list(resv); - list_del(&vma->head); + list_del_init(&vma->head); if (fobj && fobj->shared_count > 1) ttm_bo_wait(&nvbo->bo, false, false); @@ -133,14 +132,20 @@ nouveau_gem_object_unmap(struct nouveau_bo *nvbo, struct nvkm_vma *vma) else fence = reservation_object_get_excl(nvbo->bo.resv); - if (fence && mapped) { - nouveau_fence_work(fence, nouveau_gem_object_delete, vma); - } else { - if (mapped) - nvkm_vm_unmap(vma); - nvkm_vm_put(vma); - kfree(vma); + if (!fence || !mapped) { + nouveau_gem_object_delete(vma); + return; + } + + if (!(work = kmalloc(sizeof(*work), GFP_KERNEL))) { + WARN_ON(dma_fence_wait_timeout(fence, false, 2 * HZ) <= 0); + nouveau_gem_object_delete(vma); + return; } + + work->work.func = nouveau_gem_object_delete_work; + work->vma = vma; + nouveau_cli_work_queue(vma->vmm->cli, fence, &work->work); } void @@ -150,19 +155,19 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv) struct nouveau_bo *nvbo = nouveau_gem_object(gem); struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct device *dev = drm->dev->dev; - struct nvkm_vma *vma; + struct nouveau_vma *vma; int ret; - if (!cli->vm) + if (cli->vmm.vmm.object.oclass < NVIF_CLASS_VMM_NV50) return; ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL); if (ret) return; - vma = nouveau_bo_vma_find(nvbo, cli->vm); + vma = nouveau_vma_find(nvbo, &cli->vmm); if (vma) { - if (--vma->refcount == 0) { + if (--vma->refs == 0) { ret = pm_runtime_get_sync(dev); if (!WARN_ON(ret < 0 && ret != -EACCES)) { nouveau_gem_object_unmap(nvbo, vma); @@ -179,7 +184,7 @@ nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain, uint32_t tile_mode, uint32_t tile_flags, struct nouveau_bo **pnvbo) { - struct nouveau_drm *drm = nouveau_drm(cli->dev); + struct nouveau_drm *drm = cli->drm; struct nouveau_bo *nvbo; u32 flags = 0; int ret; @@ -227,7 +232,7 @@ nouveau_gem_info(struct drm_file *file_priv, struct drm_gem_object *gem, { struct nouveau_cli *cli = nouveau_cli(file_priv); struct nouveau_bo *nvbo = nouveau_gem_object(gem); - struct nvkm_vma *vma; + struct nouveau_vma *vma; if (is_power_of_2(nvbo->valid_domains)) rep->domain = nvbo->valid_domains; @@ -236,18 +241,25 @@ nouveau_gem_info(struct drm_file *file_priv, struct drm_gem_object *gem, else rep->domain = NOUVEAU_GEM_DOMAIN_VRAM; rep->offset = nvbo->bo.offset; - if (cli->vm) { - vma = nouveau_bo_vma_find(nvbo, cli->vm); + if (cli->vmm.vmm.object.oclass >= NVIF_CLASS_VMM_NV50) { + vma = nouveau_vma_find(nvbo, &cli->vmm); if (!vma) return -EINVAL; - rep->offset = vma->offset; + rep->offset = vma->addr; } rep->size = nvbo->bo.mem.num_pages << PAGE_SHIFT; rep->map_handle = drm_vma_node_offset_addr(&nvbo->bo.vma_node); - rep->tile_mode = nvbo->tile_mode; - rep->tile_flags = nvbo->tile_flags; + rep->tile_mode = nvbo->mode; + rep->tile_flags = nvbo->contig ? 0 : NOUVEAU_GEM_TILE_NONCONTIG; + if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) + rep->tile_flags |= nvbo->kind << 8; + else + if (cli->device.info.family >= NV_DEVICE_INFO_V0_TESLA) + rep->tile_flags |= nvbo->kind << 8 | nvbo->comp << 16; + else + rep->tile_flags |= nvbo->zeta; return 0; } @@ -255,18 +267,11 @@ int nouveau_gem_ioctl_new(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct nouveau_drm *drm = nouveau_drm(dev); struct nouveau_cli *cli = nouveau_cli(file_priv); - struct nvkm_fb *fb = nvxx_fb(&drm->client.device); struct drm_nouveau_gem_new *req = data; struct nouveau_bo *nvbo = NULL; int ret = 0; - if (!nvkm_fb_memtype_valid(fb, req->info.tile_flags)) { - NV_PRINTK(err, cli, "bad page flags: 0x%08x\n", req->info.tile_flags); - return -EINVAL; - } - ret = nouveau_gem_new(cli, req->info.size, req->align, req->info.domain, req->info.tile_mode, req->info.tile_flags, &nvbo); @@ -791,7 +796,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, bo[push[i].bo_index].user_priv; uint32_t cmd; - cmd = chan->push.vma.offset + ((chan->dma.cur + 2) << 2); + cmd = chan->push.addr + ((chan->dma.cur + 2) << 2); cmd |= 0x20000000; if (unlikely(cmd != req->suffix0)) { if (!nvbo->kmap.virtual) { @@ -843,7 +848,7 @@ out_next: req->suffix1 = 0x00000000; } else { req->suffix0 = 0x20000000 | - (chan->push.vma.offset + ((chan->dma.cur + 2) << 2)); + (chan->push.addr + ((chan->dma.cur + 2) << 2)); req->suffix1 = 0x00000000; } diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.h b/drivers/gpu/drm/nouveau/nouveau_gem.h index 8fa6ed9ddd3a..fe39998f65cc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.h +++ b/drivers/gpu/drm/nouveau/nouveau_gem.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NOUVEAU_GEM_H__ #define __NOUVEAU_GEM_H__ @@ -6,9 +7,6 @@ #include "nouveau_drv.h" #include "nouveau_bo.h" -#define nouveau_bo_tile_layout(nvbo) \ - ((nvbo)->tile_flags & NOUVEAU_GEM_TILE_LAYOUT_MASK) - static inline struct nouveau_bo * nouveau_gem_object(struct drm_gem_object *gem) { diff --git a/drivers/gpu/drm/nouveau/nouveau_ioctl.h b/drivers/gpu/drm/nouveau/nouveau_ioctl.h index 3b9f2e5463a7..380ede26806c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ioctl.h +++ b/drivers/gpu/drm/nouveau/nouveau_ioctl.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NOUVEAU_IOCTL_H__ #define __NOUVEAU_IOCTL_H__ diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c new file mode 100644 index 000000000000..589a9621db76 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -0,0 +1,198 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "nouveau_mem.h" +#include "nouveau_drv.h" +#include "nouveau_bo.h" + +#include <drm/ttm/ttm_bo_driver.h> + +#include <nvif/class.h> +#include <nvif/if000a.h> +#include <nvif/if500b.h> +#include <nvif/if500d.h> +#include <nvif/if900b.h> +#include <nvif/if900d.h> + +int +nouveau_mem_map(struct nouveau_mem *mem, + struct nvif_vmm *vmm, struct nvif_vma *vma) +{ + union { + struct nv50_vmm_map_v0 nv50; + struct gf100_vmm_map_v0 gf100; + } args; + u32 argc = 0; + bool super; + int ret; + + switch (vmm->object.oclass) { + case NVIF_CLASS_VMM_NV04: + break; + case NVIF_CLASS_VMM_NV50: + args.nv50.version = 0; + args.nv50.ro = 0; + args.nv50.priv = 0; + args.nv50.kind = mem->kind; + args.nv50.comp = mem->comp; + argc = sizeof(args.nv50); + break; + case NVIF_CLASS_VMM_GF100: + case NVIF_CLASS_VMM_GM200: + case NVIF_CLASS_VMM_GP100: + args.gf100.version = 0; + if (mem->mem.type & NVIF_MEM_VRAM) + args.gf100.vol = 0; + else + args.gf100.vol = 1; + args.gf100.ro = 0; + args.gf100.priv = 0; + args.gf100.kind = mem->kind; + argc = sizeof(args.gf100); + break; + default: + WARN_ON(1); + return -ENOSYS; + } + + super = vmm->object.client->super; + vmm->object.client->super = true; + ret = nvif_vmm_map(vmm, vma->addr, mem->mem.size, &args, argc, + &mem->mem, 0); + vmm->object.client->super = super; + return ret; +} + +void +nouveau_mem_fini(struct nouveau_mem *mem) +{ + nvif_vmm_put(&mem->cli->drm->client.vmm.vmm, &mem->vma[1]); + nvif_vmm_put(&mem->cli->drm->client.vmm.vmm, &mem->vma[0]); + mutex_lock(&mem->cli->drm->master.lock); + nvif_mem_fini(&mem->mem); + mutex_unlock(&mem->cli->drm->master.lock); +} + +int +nouveau_mem_host(struct ttm_mem_reg *reg, struct ttm_dma_tt *tt) +{ + struct nouveau_mem *mem = nouveau_mem(reg); + struct nouveau_cli *cli = mem->cli; + struct nouveau_drm *drm = cli->drm; + struct nvif_mmu *mmu = &cli->mmu; + struct nvif_mem_ram_v0 args = {}; + bool super = cli->base.super; + u8 type; + int ret; + + if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED) + type = drm->ttm.type_ncoh; + else + type = drm->ttm.type_host; + + if (mem->kind && !(mmu->type[type].type & NVIF_MEM_KIND)) + mem->comp = mem->kind = 0; + if (mem->comp && !(mmu->type[type].type & NVIF_MEM_COMP)) { + if (mmu->object.oclass >= NVIF_CLASS_MMU_GF100) + mem->kind = mmu->kind[mem->kind]; + mem->comp = 0; + } + + if (tt->ttm.sg) args.sgl = tt->ttm.sg->sgl; + else args.dma = tt->dma_address; + + mutex_lock(&drm->master.lock); + cli->base.super = true; + ret = nvif_mem_init_type(mmu, cli->mem->oclass, type, PAGE_SHIFT, + reg->num_pages << PAGE_SHIFT, + &args, sizeof(args), &mem->mem); + cli->base.super = super; + mutex_unlock(&drm->master.lock); + return ret; +} + +int +nouveau_mem_vram(struct ttm_mem_reg *reg, bool contig, u8 page) +{ + struct nouveau_mem *mem = nouveau_mem(reg); + struct nouveau_cli *cli = mem->cli; + struct nouveau_drm *drm = cli->drm; + struct nvif_mmu *mmu = &cli->mmu; + bool super = cli->base.super; + u64 size = ALIGN(reg->num_pages << PAGE_SHIFT, 1 << page); + int ret; + + mutex_lock(&drm->master.lock); + cli->base.super = true; + switch (cli->mem->oclass) { + case NVIF_CLASS_MEM_GF100: + ret = nvif_mem_init_type(mmu, cli->mem->oclass, + drm->ttm.type_vram, page, size, + &(struct gf100_mem_v0) { + .contig = contig, + }, sizeof(struct gf100_mem_v0), + &mem->mem); + break; + case NVIF_CLASS_MEM_NV50: + ret = nvif_mem_init_type(mmu, cli->mem->oclass, + drm->ttm.type_vram, page, size, + &(struct nv50_mem_v0) { + .bankswz = mmu->kind[mem->kind] == 2, + .contig = contig, + }, sizeof(struct nv50_mem_v0), + &mem->mem); + break; + default: + ret = -ENOSYS; + WARN_ON(1); + break; + } + cli->base.super = super; + mutex_unlock(&drm->master.lock); + + reg->start = mem->mem.addr >> PAGE_SHIFT; + return ret; +} + +void +nouveau_mem_del(struct ttm_mem_reg *reg) +{ + struct nouveau_mem *mem = nouveau_mem(reg); + nouveau_mem_fini(mem); + kfree(reg->mm_node); + reg->mm_node = NULL; +} + +int +nouveau_mem_new(struct nouveau_cli *cli, u8 kind, u8 comp, + struct ttm_mem_reg *reg) +{ + struct nouveau_mem *mem; + + if (!(mem = kzalloc(sizeof(*mem), GFP_KERNEL))) + return -ENOMEM; + mem->cli = cli; + mem->kind = kind; + mem->comp = comp; + + reg->mm_node = mem; + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.h b/drivers/gpu/drm/nouveau/nouveau_mem.h new file mode 100644 index 000000000000..f6d039e73812 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nouveau_mem.h @@ -0,0 +1,30 @@ +#ifndef __NOUVEAU_MEM_H__ +#define __NOUVEAU_MEM_H__ +#include <drm/ttm/ttm_bo_api.h> +struct ttm_dma_tt; + +#include <nvif/mem.h> +#include <nvif/vmm.h> + +static inline struct nouveau_mem * +nouveau_mem(struct ttm_mem_reg *reg) +{ + return reg->mm_node; +} + +struct nouveau_mem { + struct nouveau_cli *cli; + u8 kind; + u8 comp; + struct nvif_mem mem; + struct nvif_vma vma[2]; +}; + +int nouveau_mem_new(struct nouveau_cli *, u8 kind, u8 comp, + struct ttm_mem_reg *); +void nouveau_mem_del(struct ttm_mem_reg *); +int nouveau_mem_vram(struct ttm_mem_reg *, bool contig, u8 page); +int nouveau_mem_host(struct ttm_mem_reg *, struct ttm_dma_tt *); +void nouveau_mem_fini(struct nouveau_mem *); +int nouveau_mem_map(struct nouveau_mem *, struct nvif_vmm *, struct nvif_vma *); +#endif diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h index 7226f1f60901..b5b5fe40779d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_reg.h +++ b/drivers/gpu/drm/nouveau/nouveau_reg.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #define NV04_PFB_BOOT_0 0x00100000 # define NV04_PFB_BOOT_0_RAM_AMOUNT 0x00000003 diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c index b7ab268f7d6f..11f6ca89769b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c +++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c @@ -1,7 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/pagemap.h> #include <linux/slab.h> #include "nouveau_drv.h" +#include "nouveau_mem.h" #include "nouveau_ttm.h" struct nouveau_sgdma_be { @@ -9,7 +11,7 @@ struct nouveau_sgdma_be { * nouve_bo.c works properly, otherwise have to move them here */ struct ttm_dma_tt ttm; - struct nvkm_mem *node; + struct nouveau_mem *mem; }; static void @@ -27,19 +29,20 @@ static int nv04_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *reg) { struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm; - struct nvkm_mem *node = reg->mm_node; - - if (ttm->sg) { - node->sg = ttm->sg; - node->pages = NULL; - } else { - node->sg = NULL; - node->pages = nvbe->ttm.dma_address; + struct nouveau_mem *mem = nouveau_mem(reg); + int ret; + + ret = nouveau_mem_host(reg, &nvbe->ttm); + if (ret) + return ret; + + ret = nouveau_mem_map(mem, &mem->cli->vmm.vmm, &mem->vma[0]); + if (ret) { + nouveau_mem_fini(mem); + return ret; } - node->size = (reg->num_pages << PAGE_SHIFT) >> 12; - nvkm_vm_map(&node->vma[0], node); - nvbe->node = node; + nvbe->mem = mem; return 0; } @@ -47,7 +50,7 @@ static int nv04_sgdma_unbind(struct ttm_tt *ttm) { struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm; - nvkm_vm_unmap(&nvbe->node->vma[0]); + nouveau_mem_fini(nvbe->mem); return 0; } @@ -61,30 +64,20 @@ static int nv50_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *reg) { struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm; - struct nvkm_mem *node = reg->mm_node; - - /* noop: bound in move_notify() */ - if (ttm->sg) { - node->sg = ttm->sg; - node->pages = NULL; - } else { - node->sg = NULL; - node->pages = nvbe->ttm.dma_address; - } - node->size = (reg->num_pages << PAGE_SHIFT) >> 12; - return 0; -} + struct nouveau_mem *mem = nouveau_mem(reg); + int ret; -static int -nv50_sgdma_unbind(struct ttm_tt *ttm) -{ - /* noop: unbound in move_notify() */ + ret = nouveau_mem_host(reg, &nvbe->ttm); + if (ret) + return ret; + + nvbe->mem = mem; return 0; } static struct ttm_backend_func nv50_sgdma_backend = { .bind = nv50_sgdma_bind, - .unbind = nv50_sgdma_unbind, + .unbind = nv04_sgdma_unbind, .destroy = nouveau_sgdma_destroy }; diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index b0ad7fcefcf5..08b974b30482 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -23,53 +23,37 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - #include "nouveau_drv.h" -#include "nouveau_ttm.h" #include "nouveau_gem.h" +#include "nouveau_mem.h" +#include "nouveau_ttm.h" #include <drm/drm_legacy.h> #include <core/tegra.h> static int -nouveau_vram_manager_init(struct ttm_mem_type_manager *man, unsigned long psize) +nouveau_manager_init(struct ttm_mem_type_manager *man, unsigned long psize) { - struct nouveau_drm *drm = nouveau_bdev(man->bdev); - struct nvkm_fb *fb = nvxx_fb(&drm->client.device); - man->priv = fb; return 0; } static int -nouveau_vram_manager_fini(struct ttm_mem_type_manager *man) +nouveau_manager_fini(struct ttm_mem_type_manager *man) { - man->priv = NULL; return 0; } -static inline void -nvkm_mem_node_cleanup(struct nvkm_mem *node) +static void +nouveau_manager_del(struct ttm_mem_type_manager *man, struct ttm_mem_reg *reg) { - if (node->vma[0].node) { - nvkm_vm_unmap(&node->vma[0]); - nvkm_vm_put(&node->vma[0]); - } - - if (node->vma[1].node) { - nvkm_vm_unmap(&node->vma[1]); - nvkm_vm_put(&node->vma[1]); - } + nouveau_mem_del(reg); } static void -nouveau_vram_manager_del(struct ttm_mem_type_manager *man, - struct ttm_mem_reg *reg) +nouveau_manager_debug(struct ttm_mem_type_manager *man, + struct drm_printer *printer) { - struct nouveau_drm *drm = nouveau_bdev(man->bdev); - struct nvkm_ram *ram = nvxx_fb(&drm->client.device)->ram; - nvkm_mem_node_cleanup(reg->mm_node); - ram->func->put(ram, (struct nvkm_mem **)®->mm_node); } static int @@ -78,192 +62,105 @@ nouveau_vram_manager_new(struct ttm_mem_type_manager *man, const struct ttm_place *place, struct ttm_mem_reg *reg) { - struct nouveau_drm *drm = nouveau_bdev(man->bdev); - struct nvkm_ram *ram = nvxx_fb(&drm->client.device)->ram; struct nouveau_bo *nvbo = nouveau_bo(bo); - struct nvkm_mem *node; - u32 size_nc = 0; + struct nouveau_drm *drm = nvbo->cli->drm; + struct nouveau_mem *mem; int ret; if (drm->client.device.info.ram_size == 0) return -ENOMEM; - if (nvbo->tile_flags & NOUVEAU_GEM_TILE_NONCONTIG) - size_nc = 1 << nvbo->page_shift; + ret = nouveau_mem_new(&drm->master, nvbo->kind, nvbo->comp, reg); + mem = nouveau_mem(reg); + if (ret) + return ret; - ret = ram->func->get(ram, reg->num_pages << PAGE_SHIFT, - reg->page_alignment << PAGE_SHIFT, size_nc, - (nvbo->tile_flags >> 8) & 0x3ff, &node); + ret = nouveau_mem_vram(reg, nvbo->contig, nvbo->page); if (ret) { - reg->mm_node = NULL; - return (ret == -ENOSPC) ? 0 : ret; + nouveau_mem_del(reg); + if (ret == -ENOSPC) { + reg->mm_node = NULL; + return 0; + } + return ret; } - node->page_shift = nvbo->page_shift; - - reg->mm_node = node; - reg->start = node->offset >> PAGE_SHIFT; return 0; } const struct ttm_mem_type_manager_func nouveau_vram_manager = { - .init = nouveau_vram_manager_init, - .takedown = nouveau_vram_manager_fini, + .init = nouveau_manager_init, + .takedown = nouveau_manager_fini, .get_node = nouveau_vram_manager_new, - .put_node = nouveau_vram_manager_del, + .put_node = nouveau_manager_del, + .debug = nouveau_manager_debug, }; static int -nouveau_gart_manager_init(struct ttm_mem_type_manager *man, unsigned long psize) -{ - return 0; -} - -static int -nouveau_gart_manager_fini(struct ttm_mem_type_manager *man) -{ - return 0; -} - -static void -nouveau_gart_manager_del(struct ttm_mem_type_manager *man, - struct ttm_mem_reg *reg) -{ - nvkm_mem_node_cleanup(reg->mm_node); - kfree(reg->mm_node); - reg->mm_node = NULL; -} - -static int nouveau_gart_manager_new(struct ttm_mem_type_manager *man, struct ttm_buffer_object *bo, const struct ttm_place *place, struct ttm_mem_reg *reg) { - struct nouveau_drm *drm = nouveau_bdev(bo->bdev); struct nouveau_bo *nvbo = nouveau_bo(bo); - struct nvkm_mem *node; - - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (!node) - return -ENOMEM; + struct nouveau_drm *drm = nvbo->cli->drm; + struct nouveau_mem *mem; + int ret; - node->page_shift = 12; - - switch (drm->client.device.info.family) { - case NV_DEVICE_INFO_V0_TNT: - case NV_DEVICE_INFO_V0_CELSIUS: - case NV_DEVICE_INFO_V0_KELVIN: - case NV_DEVICE_INFO_V0_RANKINE: - case NV_DEVICE_INFO_V0_CURIE: - break; - case NV_DEVICE_INFO_V0_TESLA: - if (drm->client.device.info.chipset != 0x50) - node->memtype = (nvbo->tile_flags & 0x7f00) >> 8; - break; - case NV_DEVICE_INFO_V0_FERMI: - case NV_DEVICE_INFO_V0_KEPLER: - case NV_DEVICE_INFO_V0_MAXWELL: - case NV_DEVICE_INFO_V0_PASCAL: - node->memtype = (nvbo->tile_flags & 0xff00) >> 8; - break; - default: - NV_WARN(drm, "%s: unhandled family type %x\n", __func__, - drm->client.device.info.family); - break; - } + ret = nouveau_mem_new(&drm->master, nvbo->kind, nvbo->comp, reg); + mem = nouveau_mem(reg); + if (ret) + return ret; - reg->mm_node = node; - reg->start = 0; + reg->start = 0; return 0; } -static void -nouveau_gart_manager_debug(struct ttm_mem_type_manager *man, - struct drm_printer *printer) -{ -} - const struct ttm_mem_type_manager_func nouveau_gart_manager = { - .init = nouveau_gart_manager_init, - .takedown = nouveau_gart_manager_fini, + .init = nouveau_manager_init, + .takedown = nouveau_manager_fini, .get_node = nouveau_gart_manager_new, - .put_node = nouveau_gart_manager_del, - .debug = nouveau_gart_manager_debug + .put_node = nouveau_manager_del, + .debug = nouveau_manager_debug }; -/*XXX*/ -#include <subdev/mmu/nv04.h> -static int -nv04_gart_manager_init(struct ttm_mem_type_manager *man, unsigned long psize) -{ - struct nouveau_drm *drm = nouveau_bdev(man->bdev); - struct nvkm_mmu *mmu = nvxx_mmu(&drm->client.device); - struct nv04_mmu *priv = (void *)mmu; - struct nvkm_vm *vm = NULL; - nvkm_vm_ref(priv->vm, &vm, NULL); - man->priv = vm; - return 0; -} - -static int -nv04_gart_manager_fini(struct ttm_mem_type_manager *man) -{ - struct nvkm_vm *vm = man->priv; - nvkm_vm_ref(NULL, &vm, NULL); - man->priv = NULL; - return 0; -} - -static void -nv04_gart_manager_del(struct ttm_mem_type_manager *man, struct ttm_mem_reg *reg) -{ - struct nvkm_mem *node = reg->mm_node; - if (node->vma[0].node) - nvkm_vm_put(&node->vma[0]); - kfree(reg->mm_node); - reg->mm_node = NULL; -} - static int nv04_gart_manager_new(struct ttm_mem_type_manager *man, struct ttm_buffer_object *bo, const struct ttm_place *place, struct ttm_mem_reg *reg) { - struct nvkm_mem *node; + struct nouveau_bo *nvbo = nouveau_bo(bo); + struct nouveau_drm *drm = nvbo->cli->drm; + struct nouveau_mem *mem; int ret; - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (!node) - return -ENOMEM; - - node->page_shift = 12; + ret = nouveau_mem_new(&drm->master, nvbo->kind, nvbo->comp, reg); + mem = nouveau_mem(reg); + if (ret) + return ret; - ret = nvkm_vm_get(man->priv, reg->num_pages << 12, node->page_shift, - NV_MEM_ACCESS_RW, &node->vma[0]); + ret = nvif_vmm_get(&mem->cli->vmm.vmm, PTES, false, 12, 0, + reg->num_pages << PAGE_SHIFT, &mem->vma[0]); if (ret) { - kfree(node); + nouveau_mem_del(reg); + if (ret == -ENOSPC) { + reg->mm_node = NULL; + return 0; + } return ret; } - reg->mm_node = node; - reg->start = node->vma[0].offset >> PAGE_SHIFT; + reg->start = mem->vma[0].addr >> PAGE_SHIFT; return 0; } -static void -nv04_gart_manager_debug(struct ttm_mem_type_manager *man, - struct drm_printer *printer) -{ -} - const struct ttm_mem_type_manager_func nv04_gart_manager = { - .init = nv04_gart_manager_init, - .takedown = nv04_gart_manager_fini, + .init = nouveau_manager_init, + .takedown = nouveau_manager_fini, .get_node = nv04_gart_manager_new, - .put_node = nv04_gart_manager_del, - .debug = nv04_gart_manager_debug + .put_node = nouveau_manager_del, + .debug = nouveau_manager_debug }; int @@ -343,44 +240,43 @@ nouveau_ttm_init(struct nouveau_drm *drm) { struct nvkm_device *device = nvxx_device(&drm->client.device); struct nvkm_pci *pci = device->pci; + struct nvif_mmu *mmu = &drm->client.mmu; struct drm_device *dev = drm->dev; - u8 bits; - int ret; + int typei, ret; - if (pci && pci->agp.bridge) { - drm->agp.bridge = pci->agp.bridge; - drm->agp.base = pci->agp.base; - drm->agp.size = pci->agp.size; - drm->agp.cma = pci->agp.cma; - } + typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | + NVIF_MEM_COHERENT); + if (typei < 0) + return -ENOSYS; - bits = nvxx_mmu(&drm->client.device)->dma_bits; - if (nvxx_device(&drm->client.device)->func->pci) { - if (drm->agp.bridge) - bits = 32; - } else if (device->func->tegra) { - struct nvkm_device_tegra *tegra = device->func->tegra(device); + drm->ttm.type_host = typei; - /* - * If the platform can use a IOMMU, then the addressable DMA - * space is constrained by the IOMMU bit - */ - if (tegra->func->iommu_bit) - bits = min(bits, tegra->func->iommu_bit); + typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE); + if (typei < 0) + return -ENOSYS; - } + drm->ttm.type_ncoh = typei; - ret = dma_set_mask(dev->dev, DMA_BIT_MASK(bits)); - if (ret && bits != 32) { - bits = 32; - ret = dma_set_mask(dev->dev, DMA_BIT_MASK(bits)); + if (drm->client.device.info.platform != NV_DEVICE_INFO_V0_SOC && + drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) { + typei = nvif_mmu_type(mmu, NVIF_MEM_VRAM | NVIF_MEM_MAPPABLE | + NVIF_MEM_KIND | + NVIF_MEM_COMP | + NVIF_MEM_DISP); + if (typei < 0) + return -ENOSYS; + + drm->ttm.type_vram = typei; + } else { + drm->ttm.type_vram = -1; } - if (ret) - return ret; - ret = dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(bits)); - if (ret) - dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(32)); + if (pci && pci->agp.bridge) { + drm->agp.bridge = pci->agp.bridge; + drm->agp.base = pci->agp.base; + drm->agp.size = pci->agp.size; + drm->agp.cma = pci->agp.cma; + } ret = nouveau_ttm_global_init(drm); if (ret) @@ -391,7 +287,7 @@ nouveau_ttm_init(struct nouveau_drm *drm) &nouveau_bo_driver, dev->anon_inode->i_mapping, DRM_FILE_PAGE_OFFSET, - bits <= 32 ? true : false); + drm->client.mmu.dmabits <= 32 ? true : false); if (ret) { NV_ERROR(drm, "error initialising bo driver, %d\n", ret); return ret; @@ -415,7 +311,7 @@ nouveau_ttm_init(struct nouveau_drm *drm) /* GART init */ if (!drm->agp.bridge) { - drm->gem.gart_available = nvxx_mmu(&drm->client.device)->limit; + drm->gem.gart_available = drm->client.vmm.vmm.limit; } else { drm->gem.gart_available = drm->agp.size; } diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.h b/drivers/gpu/drm/nouveau/nouveau_ttm.h index 25b0de413352..96082b696420 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.h +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NOUVEAU_TTM_H__ #define __NOUVEAU_TTM_H__ diff --git a/drivers/gpu/drm/nouveau/nouveau_usif.h b/drivers/gpu/drm/nouveau/nouveau_usif.h index c037e3ae8c70..c68f1c65af3b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_usif.h +++ b/drivers/gpu/drm/nouveau/nouveau_usif.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NOUVEAU_USIF_H__ #define __NOUVEAU_USIF_H__ diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c index 48393a4f6331..52e52a360fb1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vga.c +++ b/drivers/gpu/drm/nouveau/nouveau_vga.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/vgaarb.h> #include <linux/vga_switcheroo.h> diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.h b/drivers/gpu/drm/nouveau/nouveau_vga.h index ea3ad6974c65..6a3000c88142 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vga.h +++ b/drivers/gpu/drm/nouveau/nouveau_vga.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NOUVEAU_VGA_H__ #define __NOUVEAU_VGA_H__ diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c new file mode 100644 index 000000000000..9e2628dd8e4d --- /dev/null +++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c @@ -0,0 +1,135 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "nouveau_vmm.h" +#include "nouveau_drv.h" +#include "nouveau_bo.h" +#include "nouveau_mem.h" + +void +nouveau_vma_unmap(struct nouveau_vma *vma) +{ + if (vma->mem) { + nvif_vmm_unmap(&vma->vmm->vmm, vma->addr); + vma->mem = NULL; + } +} + +int +nouveau_vma_map(struct nouveau_vma *vma, struct nouveau_mem *mem) +{ + struct nvif_vma tmp = { .addr = vma->addr }; + int ret = nouveau_mem_map(mem, &vma->vmm->vmm, &tmp); + if (ret) + return ret; + vma->mem = mem; + return 0; +} + +struct nouveau_vma * +nouveau_vma_find(struct nouveau_bo *nvbo, struct nouveau_vmm *vmm) +{ + struct nouveau_vma *vma; + + list_for_each_entry(vma, &nvbo->vma_list, head) { + if (vma->vmm == vmm) + return vma; + } + + return NULL; +} + +void +nouveau_vma_del(struct nouveau_vma **pvma) +{ + struct nouveau_vma *vma = *pvma; + if (vma && --vma->refs <= 0) { + if (likely(vma->addr != ~0ULL)) { + struct nvif_vma tmp = { .addr = vma->addr, .size = 1 }; + nvif_vmm_put(&vma->vmm->vmm, &tmp); + } + list_del(&vma->head); + *pvma = NULL; + kfree(*pvma); + } +} + +int +nouveau_vma_new(struct nouveau_bo *nvbo, struct nouveau_vmm *vmm, + struct nouveau_vma **pvma) +{ + struct nouveau_mem *mem = nouveau_mem(&nvbo->bo.mem); + struct nouveau_vma *vma; + struct nvif_vma tmp; + int ret; + + if ((vma = *pvma = nouveau_vma_find(nvbo, vmm))) { + vma->refs++; + return 0; + } + + if (!(vma = *pvma = kmalloc(sizeof(*vma), GFP_KERNEL))) + return -ENOMEM; + vma->vmm = vmm; + vma->refs = 1; + vma->addr = ~0ULL; + vma->mem = NULL; + list_add_tail(&vma->head, &nvbo->vma_list); + + if (nvbo->bo.mem.mem_type != TTM_PL_SYSTEM && + mem->mem.page == nvbo->page) { + ret = nvif_vmm_get(&vmm->vmm, LAZY, false, mem->mem.page, 0, + mem->mem.size, &tmp); + if (ret) + goto done; + + vma->addr = tmp.addr; + ret = nouveau_vma_map(vma, mem); + } else { + ret = nvif_vmm_get(&vmm->vmm, PTES, false, mem->mem.page, 0, + mem->mem.size, &tmp); + vma->addr = tmp.addr; + } + +done: + if (ret) + nouveau_vma_del(pvma); + return ret; +} + +void +nouveau_vmm_fini(struct nouveau_vmm *vmm) +{ + nvif_vmm_fini(&vmm->vmm); + vmm->cli = NULL; +} + +int +nouveau_vmm_init(struct nouveau_cli *cli, s32 oclass, struct nouveau_vmm *vmm) +{ + int ret = nvif_vmm_init(&cli->mmu, oclass, PAGE_SIZE, 0, NULL, 0, + &vmm->vmm); + if (ret) + return ret; + + vmm->cli = cli; + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.h b/drivers/gpu/drm/nouveau/nouveau_vmm.h new file mode 100644 index 000000000000..5c31f43678d3 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nouveau_vmm.h @@ -0,0 +1,31 @@ +#ifndef __NOUVEAU_VMA_H__ +#define __NOUVEAU_VMA_H__ +#include <nvif/vmm.h> +struct nouveau_bo; +struct nouveau_mem; + +struct nouveau_vma { + struct nouveau_vmm *vmm; + int refs; + struct list_head head; + u64 addr; + + struct nouveau_mem *mem; +}; + +struct nouveau_vma *nouveau_vma_find(struct nouveau_bo *, struct nouveau_vmm *); +int nouveau_vma_new(struct nouveau_bo *, struct nouveau_vmm *, + struct nouveau_vma **); +void nouveau_vma_del(struct nouveau_vma **); +int nouveau_vma_map(struct nouveau_vma *, struct nouveau_mem *); +void nouveau_vma_unmap(struct nouveau_vma *); + +struct nouveau_vmm { + struct nouveau_cli *cli; + struct nvif_vmm vmm; + struct nvkm_vm *vm; +}; + +int nouveau_vmm_init(struct nouveau_cli *, s32 oclass, struct nouveau_vmm *); +void nouveau_vmm_fini(struct nouveau_vmm *); +#endif diff --git a/drivers/gpu/drm/nouveau/nv10_fence.h b/drivers/gpu/drm/nouveau/nv10_fence.h index b7a508585304..7616c66803f8 100644 --- a/drivers/gpu/drm/nouveau/nv10_fence.h +++ b/drivers/gpu/drm/nouveau/nv10_fence.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV10_FENCE_H_ #define __NV10_FENCE_H_ diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 2dbf62a2ac41..584466ef688f 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -318,7 +318,7 @@ nv50_chan_create(struct nvif_device *device, struct nvif_object *disp, ret = nvif_object_init(disp, 0, oclass[0], data, size, &chan->user); if (ret == 0) - nvif_object_map(&chan->user); + nvif_object_map(&chan->user, NULL, 0); nvif_object_sclass_put(&sclass); return ret; } @@ -424,7 +424,7 @@ nv50_dmac_ctxdma_new(struct nv50_dmac *dmac, struct nouveau_framebuffer *fb) { struct nouveau_drm *drm = nouveau_drm(fb->base.dev); struct nv50_dmac_ctxdma *ctxdma; - const u8 kind = (fb->nvbo->tile_flags & 0x0000ff00) >> 8; + const u8 kind = fb->nvbo->kind; const u32 handle = 0xfb000000 | kind; struct { struct nv_dma_v0 base; @@ -510,6 +510,7 @@ nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp, int ret; mutex_init(&dmac->lock); + INIT_LIST_HEAD(&dmac->ctxdma); dmac->ptr = dma_alloc_coherent(nvxx_device(device)->dev, PAGE_SIZE, &dmac->handle, GFP_KERNEL); @@ -556,7 +557,6 @@ nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp, if (ret) return ret; - INIT_LIST_HEAD(&dmac->ctxdma); return ret; } @@ -847,7 +847,7 @@ nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw, asyw->image.w = fb->base.width; asyw->image.h = fb->base.height; - asyw->image.kind = (fb->nvbo->tile_flags & 0x0000ff00) >> 8; + asyw->image.kind = fb->nvbo->kind; if (asyh->state.pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC) asyw->interval = 0; @@ -857,9 +857,9 @@ nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw, if (asyw->image.kind) { asyw->image.layout = 0; if (drm->client.device.info.chipset >= 0xc0) - asyw->image.block = fb->nvbo->tile_mode >> 4; + asyw->image.block = fb->nvbo->mode >> 4; else - asyw->image.block = fb->nvbo->tile_mode; + asyw->image.block = fb->nvbo->mode; asyw->image.pitch = (fb->base.pitches[0] / 4) << 4; } else { asyw->image.layout = 1; @@ -3265,11 +3265,14 @@ nv50_mstm = { void nv50_mstm_service(struct nv50_mstm *mstm) { - struct drm_dp_aux *aux = mstm->mgr.aux; + struct drm_dp_aux *aux = mstm ? mstm->mgr.aux : NULL; bool handled = true; int ret; u8 esi[8] = {}; + if (!aux) + return; + while (handled) { ret = drm_dp_dpcd_read(aux, DP_SINK_COUNT_ESI, esi, 8); if (ret != 8) { @@ -4096,7 +4099,7 @@ nv50_disp_atomic_commit(struct drm_device *dev, { struct nouveau_drm *drm = nouveau_drm(dev); struct nv50_disp *disp = nv50_disp(dev); - struct drm_plane_state *old_plane_state; + struct drm_plane_state *new_plane_state; struct drm_plane *plane; struct drm_crtc *crtc; bool active = false; @@ -4126,8 +4129,8 @@ nv50_disp_atomic_commit(struct drm_device *dev, if (ret) goto err_cleanup; - for_each_old_plane_in_state(state, plane, old_plane_state, i) { - struct nv50_wndw_atom *asyw = nv50_wndw_atom(old_plane_state); + for_each_new_plane_in_state(state, plane, new_plane_state, i) { + struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state); struct nv50_wndw *wndw = nv50_wndw(plane); if (asyw->set.image) { diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c index 327dcd7901ed..facd18564e0d 100644 --- a/drivers/gpu/drm/nouveau/nv50_fbcon.c +++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c @@ -25,6 +25,7 @@ #include "nouveau_drv.h" #include "nouveau_dma.h" #include "nouveau_fbcon.h" +#include "nouveau_vmm.h" int nv50_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect) @@ -239,8 +240,8 @@ nv50_fbcon_accel_init(struct fb_info *info) OUT_RING(chan, info->fix.line_length); OUT_RING(chan, info->var.xres_virtual); OUT_RING(chan, info->var.yres_virtual); - OUT_RING(chan, upper_32_bits(fb->vma.offset)); - OUT_RING(chan, lower_32_bits(fb->vma.offset)); + OUT_RING(chan, upper_32_bits(fb->vma->addr)); + OUT_RING(chan, lower_32_bits(fb->vma->addr)); BEGIN_NV04(chan, NvSub2D, 0x0230, 2); OUT_RING(chan, format); OUT_RING(chan, 1); @@ -248,8 +249,8 @@ nv50_fbcon_accel_init(struct fb_info *info) OUT_RING(chan, info->fix.line_length); OUT_RING(chan, info->var.xres_virtual); OUT_RING(chan, info->var.yres_virtual); - OUT_RING(chan, upper_32_bits(fb->vma.offset)); - OUT_RING(chan, lower_32_bits(fb->vma.offset)); + OUT_RING(chan, upper_32_bits(fb->vma->addr)); + OUT_RING(chan, lower_32_bits(fb->vma->addr)); FIRE_RING(chan); return 0; diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c index bd7a8a1e4ad9..5f0c0c27d5dc 100644 --- a/drivers/gpu/drm/nouveau/nv84_fence.c +++ b/drivers/gpu/drm/nouveau/nv84_fence.c @@ -25,6 +25,7 @@ #include "nouveau_drv.h" #include "nouveau_dma.h" #include "nouveau_fence.h" +#include "nouveau_vmm.h" #include "nv50_display.h" @@ -68,12 +69,7 @@ nv84_fence_emit(struct nouveau_fence *fence) { struct nouveau_channel *chan = fence->channel; struct nv84_fence_chan *fctx = chan->fence; - u64 addr = chan->chid * 16; - - if (fence->sysmem) - addr += fctx->vma_gart.offset; - else - addr += fctx->vma.offset; + u64 addr = fctx->vma->addr + chan->chid * 16; return fctx->base.emit32(chan, addr, fence->base.seqno); } @@ -83,12 +79,7 @@ nv84_fence_sync(struct nouveau_fence *fence, struct nouveau_channel *prev, struct nouveau_channel *chan) { struct nv84_fence_chan *fctx = chan->fence; - u64 addr = prev->chid * 16; - - if (fence->sysmem) - addr += fctx->vma_gart.offset; - else - addr += fctx->vma.offset; + u64 addr = fctx->vma->addr + prev->chid * 16; return fctx->base.sync32(chan, addr, fence->base.seqno); } @@ -108,8 +99,7 @@ nv84_fence_context_del(struct nouveau_channel *chan) nouveau_bo_wr32(priv->bo, chan->chid * 16 / 4, fctx->base.sequence); mutex_lock(&priv->mutex); - nouveau_bo_vma_del(priv->bo, &fctx->vma_gart); - nouveau_bo_vma_del(priv->bo, &fctx->vma); + nouveau_vma_del(&fctx->vma); mutex_unlock(&priv->mutex); nouveau_fence_context_del(&fctx->base); chan->fence = NULL; @@ -137,11 +127,7 @@ nv84_fence_context_new(struct nouveau_channel *chan) fctx->base.sequence = nv84_fence_read(chan); mutex_lock(&priv->mutex); - ret = nouveau_bo_vma_add(priv->bo, cli->vm, &fctx->vma); - if (ret == 0) { - ret = nouveau_bo_vma_add(priv->bo_gart, cli->vm, - &fctx->vma_gart); - } + ret = nouveau_vma_new(priv->bo, &cli->vmm, &fctx->vma); mutex_unlock(&priv->mutex); if (ret) @@ -182,10 +168,6 @@ static void nv84_fence_destroy(struct nouveau_drm *drm) { struct nv84_fence_priv *priv = drm->fence; - nouveau_bo_unmap(priv->bo_gart); - if (priv->bo_gart) - nouveau_bo_unpin(priv->bo_gart); - nouveau_bo_ref(NULL, &priv->bo_gart); nouveau_bo_unmap(priv->bo); if (priv->bo) nouveau_bo_unpin(priv->bo); @@ -238,21 +220,6 @@ nv84_fence_create(struct nouveau_drm *drm) nouveau_bo_ref(NULL, &priv->bo); } - if (ret == 0) - ret = nouveau_bo_new(&drm->client, 16 * priv->base.contexts, 0, - TTM_PL_FLAG_TT | TTM_PL_FLAG_UNCACHED, 0, - 0, NULL, NULL, &priv->bo_gart); - if (ret == 0) { - ret = nouveau_bo_pin(priv->bo_gart, TTM_PL_FLAG_TT, false); - if (ret == 0) { - ret = nouveau_bo_map(priv->bo_gart); - if (ret) - nouveau_bo_unpin(priv->bo_gart); - } - if (ret) - nouveau_bo_ref(NULL, &priv->bo_gart); - } - if (ret) nv84_fence_destroy(drm); return ret; diff --git a/drivers/gpu/drm/nouveau/nvc0_fbcon.c b/drivers/gpu/drm/nouveau/nvc0_fbcon.c index 90f27bfa381f..c0deef4fe727 100644 --- a/drivers/gpu/drm/nouveau/nvc0_fbcon.c +++ b/drivers/gpu/drm/nouveau/nvc0_fbcon.c @@ -25,6 +25,7 @@ #include "nouveau_drv.h" #include "nouveau_dma.h" #include "nouveau_fbcon.h" +#include "nouveau_vmm.h" int nvc0_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect) @@ -239,8 +240,8 @@ nvc0_fbcon_accel_init(struct fb_info *info) OUT_RING (chan, info->fix.line_length); OUT_RING (chan, info->var.xres_virtual); OUT_RING (chan, info->var.yres_virtual); - OUT_RING (chan, upper_32_bits(fb->vma.offset)); - OUT_RING (chan, lower_32_bits(fb->vma.offset)); + OUT_RING (chan, upper_32_bits(fb->vma->addr)); + OUT_RING (chan, lower_32_bits(fb->vma->addr)); BEGIN_NVC0(chan, NvSub2D, 0x0230, 10); OUT_RING (chan, format); OUT_RING (chan, 1); @@ -250,8 +251,8 @@ nvc0_fbcon_accel_init(struct fb_info *info) OUT_RING (chan, info->fix.line_length); OUT_RING (chan, info->var.xres_virtual); OUT_RING (chan, info->var.yres_virtual); - OUT_RING (chan, upper_32_bits(fb->vma.offset)); - OUT_RING (chan, lower_32_bits(fb->vma.offset)); + OUT_RING (chan, upper_32_bits(fb->vma->addr)); + OUT_RING (chan, lower_32_bits(fb->vma->addr)); FIRE_RING (chan); return 0; diff --git a/drivers/gpu/drm/nouveau/nvif/Kbuild b/drivers/gpu/drm/nouveau/nvif/Kbuild index 067b5e9f5ec1..f1675a4ab6fa 100644 --- a/drivers/gpu/drm/nouveau/nvif/Kbuild +++ b/drivers/gpu/drm/nouveau/nvif/Kbuild @@ -2,4 +2,7 @@ nvif-y := nvif/object.o nvif-y += nvif/client.o nvif-y += nvif/device.o nvif-y += nvif/driver.o +nvif-y += nvif/mem.o +nvif-y += nvif/mmu.o nvif-y += nvif/notify.o +nvif-y += nvif/vmm.o diff --git a/drivers/gpu/drm/nouveau/nvif/mem.c b/drivers/gpu/drm/nouveau/nvif/mem.c new file mode 100644 index 000000000000..0f9382c60145 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvif/mem.c @@ -0,0 +1,88 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include <nvif/mem.h> +#include <nvif/client.h> + +#include <nvif/if000a.h> + +void +nvif_mem_fini(struct nvif_mem *mem) +{ + nvif_object_fini(&mem->object); +} + +int +nvif_mem_init_type(struct nvif_mmu *mmu, s32 oclass, int type, u8 page, + u64 size, void *argv, u32 argc, struct nvif_mem *mem) +{ + struct nvif_mem_v0 *args; + u8 stack[128]; + int ret; + + mem->object.client = NULL; + if (type < 0) + return -EINVAL; + + if (sizeof(*args) + argc > sizeof(stack)) { + if (!(args = kmalloc(sizeof(*args) + argc, GFP_KERNEL))) + return -ENOMEM; + } else { + args = (void *)stack; + } + args->version = 0; + args->type = type; + args->page = page; + args->size = size; + memcpy(args->data, argv, argc); + + ret = nvif_object_init(&mmu->object, 0, oclass, args, + sizeof(*args) + argc, &mem->object); + if (ret == 0) { + mem->type = mmu->type[type].type; + mem->page = args->page; + mem->addr = args->addr; + mem->size = args->size; + } + + if (args != (void *)stack) + kfree(args); + return ret; + +} + +int +nvif_mem_init(struct nvif_mmu *mmu, s32 oclass, u8 type, u8 page, + u64 size, void *argv, u32 argc, struct nvif_mem *mem) +{ + int ret = -EINVAL, i; + + mem->object.client = NULL; + + for (i = 0; ret && i < mmu->type_nr; i++) { + if ((mmu->type[i].type & type) == type) { + ret = nvif_mem_init_type(mmu, oclass, i, page, size, + argv, argc, mem); + } + } + + return ret; +} diff --git a/drivers/gpu/drm/nouveau/nvif/mmu.c b/drivers/gpu/drm/nouveau/nvif/mmu.c new file mode 100644 index 000000000000..15d0dcbf7ab4 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvif/mmu.c @@ -0,0 +1,117 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include <nvif/mmu.h> + +#include <nvif/class.h> +#include <nvif/if0008.h> + +void +nvif_mmu_fini(struct nvif_mmu *mmu) +{ + kfree(mmu->kind); + kfree(mmu->type); + kfree(mmu->heap); + nvif_object_fini(&mmu->object); +} + +int +nvif_mmu_init(struct nvif_object *parent, s32 oclass, struct nvif_mmu *mmu) +{ + struct nvif_mmu_v0 args; + int ret, i; + + args.version = 0; + mmu->heap = NULL; + mmu->type = NULL; + mmu->kind = NULL; + + ret = nvif_object_init(parent, 0, oclass, &args, sizeof(args), + &mmu->object); + if (ret) + goto done; + + mmu->dmabits = args.dmabits; + mmu->heap_nr = args.heap_nr; + mmu->type_nr = args.type_nr; + mmu->kind_nr = args.kind_nr; + + mmu->heap = kmalloc(sizeof(*mmu->heap) * mmu->heap_nr, GFP_KERNEL); + mmu->type = kmalloc(sizeof(*mmu->type) * mmu->type_nr, GFP_KERNEL); + if (ret = -ENOMEM, !mmu->heap || !mmu->type) + goto done; + + mmu->kind = kmalloc(sizeof(*mmu->kind) * mmu->kind_nr, GFP_KERNEL); + if (!mmu->kind && mmu->kind_nr) + goto done; + + for (i = 0; i < mmu->heap_nr; i++) { + struct nvif_mmu_heap_v0 args = { .index = i }; + + ret = nvif_object_mthd(&mmu->object, NVIF_MMU_V0_HEAP, + &args, sizeof(args)); + if (ret) + goto done; + + mmu->heap[i].size = args.size; + } + + for (i = 0; i < mmu->type_nr; i++) { + struct nvif_mmu_type_v0 args = { .index = i }; + + ret = nvif_object_mthd(&mmu->object, NVIF_MMU_V0_TYPE, + &args, sizeof(args)); + if (ret) + goto done; + + mmu->type[i].type = 0; + if (args.vram) mmu->type[i].type |= NVIF_MEM_VRAM; + if (args.host) mmu->type[i].type |= NVIF_MEM_HOST; + if (args.comp) mmu->type[i].type |= NVIF_MEM_COMP; + if (args.disp) mmu->type[i].type |= NVIF_MEM_DISP; + if (args.kind ) mmu->type[i].type |= NVIF_MEM_KIND; + if (args.mappable) mmu->type[i].type |= NVIF_MEM_MAPPABLE; + if (args.coherent) mmu->type[i].type |= NVIF_MEM_COHERENT; + if (args.uncached) mmu->type[i].type |= NVIF_MEM_UNCACHED; + mmu->type[i].heap = args.heap; + } + + if (mmu->kind_nr) { + struct nvif_mmu_kind_v0 *kind; + u32 argc = sizeof(*kind) + sizeof(*kind->data) * mmu->kind_nr; + + if (ret = -ENOMEM, !(kind = kmalloc(argc, GFP_KERNEL))) + goto done; + kind->version = 0; + kind->count = mmu->kind_nr; + + ret = nvif_object_mthd(&mmu->object, NVIF_MMU_V0_KIND, + kind, argc); + if (ret == 0) + memcpy(mmu->kind, kind->data, kind->count); + kfree(kind); + } + +done: + if (ret) + nvif_mmu_fini(mmu); + return ret; +} diff --git a/drivers/gpu/drm/nouveau/nvif/object.c b/drivers/gpu/drm/nouveau/nvif/object.c index c3fb6a20f567..40adfe9b334b 100644 --- a/drivers/gpu/drm/nouveau/nvif/object.c +++ b/drivers/gpu/drm/nouveau/nvif/object.c @@ -166,46 +166,77 @@ nvif_object_mthd(struct nvif_object *object, u32 mthd, void *data, u32 size) } void -nvif_object_unmap(struct nvif_object *object) +nvif_object_unmap_handle(struct nvif_object *object) +{ + struct { + struct nvif_ioctl_v0 ioctl; + struct nvif_ioctl_unmap unmap; + } args = { + .ioctl.type = NVIF_IOCTL_V0_UNMAP, + }; + + nvif_object_ioctl(object, &args, sizeof(args), NULL); +} + +int +nvif_object_map_handle(struct nvif_object *object, void *argv, u32 argc, + u64 *handle, u64 *length) { - if (object->map.size) { - struct nvif_client *client = object->client; - struct { - struct nvif_ioctl_v0 ioctl; - struct nvif_ioctl_unmap unmap; - } args = { - .ioctl.type = NVIF_IOCTL_V0_UNMAP, - }; + struct { + struct nvif_ioctl_v0 ioctl; + struct nvif_ioctl_map_v0 map; + } *args; + u32 argn = sizeof(*args) + argc; + int ret, maptype; + + if (!(args = kzalloc(argn, GFP_KERNEL))) + return -ENOMEM; + args->ioctl.type = NVIF_IOCTL_V0_MAP; + memcpy(args->map.data, argv, argc); - if (object->map.ptr) { + ret = nvif_object_ioctl(object, args, argn, NULL); + *handle = args->map.handle; + *length = args->map.length; + maptype = args->map.type; + kfree(args); + return ret ? ret : (maptype == NVIF_IOCTL_MAP_V0_IO); +} + +void +nvif_object_unmap(struct nvif_object *object) +{ + struct nvif_client *client = object->client; + if (object->map.ptr) { + if (object->map.size) { client->driver->unmap(client, object->map.ptr, object->map.size); - object->map.ptr = NULL; + object->map.size = 0; } - - nvif_object_ioctl(object, &args, sizeof(args), NULL); - object->map.size = 0; + object->map.ptr = NULL; + nvif_object_unmap_handle(object); } } int -nvif_object_map(struct nvif_object *object) +nvif_object_map(struct nvif_object *object, void *argv, u32 argc) { struct nvif_client *client = object->client; - struct { - struct nvif_ioctl_v0 ioctl; - struct nvif_ioctl_map_v0 map; - } args = { - .ioctl.type = NVIF_IOCTL_V0_MAP, - }; - int ret = nvif_object_ioctl(object, &args, sizeof(args), NULL); - if (ret == 0) { - object->map.size = args.map.length; - object->map.ptr = client->driver->map(client, args.map.handle, - object->map.size); - if (ret = -ENOMEM, object->map.ptr) + u64 handle, length; + int ret = nvif_object_map_handle(object, argv, argc, &handle, &length); + if (ret >= 0) { + if (ret) { + object->map.ptr = client->driver->map(client, + handle, + length); + if (ret = -ENOMEM, object->map.ptr) { + object->map.size = length; + return 0; + } + } else { + object->map.ptr = (void *)(unsigned long)handle; return 0; - nvif_object_unmap(object); + } + nvif_object_unmap_handle(object); } return ret; } diff --git a/drivers/gpu/drm/nouveau/nvif/vmm.c b/drivers/gpu/drm/nouveau/nvif/vmm.c new file mode 100644 index 000000000000..31cdb2d2e1ff --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvif/vmm.c @@ -0,0 +1,167 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include <nvif/vmm.h> +#include <nvif/mem.h> + +#include <nvif/if000c.h> + +int +nvif_vmm_unmap(struct nvif_vmm *vmm, u64 addr) +{ + return nvif_object_mthd(&vmm->object, NVIF_VMM_V0_UNMAP, + &(struct nvif_vmm_unmap_v0) { .addr = addr }, + sizeof(struct nvif_vmm_unmap_v0)); +} + +int +nvif_vmm_map(struct nvif_vmm *vmm, u64 addr, u64 size, void *argv, u32 argc, + struct nvif_mem *mem, u64 offset) +{ + struct nvif_vmm_map_v0 *args; + u8 stack[16]; + int ret; + + if (sizeof(*args) + argc > sizeof(stack)) { + if (!(args = kmalloc(sizeof(*args) + argc, GFP_KERNEL))) + return -ENOMEM; + } else { + args = (void *)stack; + } + + args->version = 0; + args->addr = addr; + args->size = size; + args->memory = nvif_handle(&mem->object); + args->offset = offset; + memcpy(args->data, argv, argc); + + ret = nvif_object_mthd(&vmm->object, NVIF_VMM_V0_MAP, + args, sizeof(*args) + argc); + if (args != (void *)stack) + kfree(args); + return ret; +} + +void +nvif_vmm_put(struct nvif_vmm *vmm, struct nvif_vma *vma) +{ + if (vma->size) { + WARN_ON(nvif_object_mthd(&vmm->object, NVIF_VMM_V0_PUT, + &(struct nvif_vmm_put_v0) { + .addr = vma->addr, + }, sizeof(struct nvif_vmm_put_v0))); + vma->size = 0; + } +} + +int +nvif_vmm_get(struct nvif_vmm *vmm, enum nvif_vmm_get type, bool sparse, + u8 page, u8 align, u64 size, struct nvif_vma *vma) +{ + struct nvif_vmm_get_v0 args; + int ret; + + args.version = vma->size = 0; + args.sparse = sparse; + args.page = page; + args.align = align; + args.size = size; + + switch (type) { + case ADDR: args.type = NVIF_VMM_GET_V0_ADDR; break; + case PTES: args.type = NVIF_VMM_GET_V0_PTES; break; + case LAZY: args.type = NVIF_VMM_GET_V0_LAZY; break; + default: + WARN_ON(1); + return -EINVAL; + } + + ret = nvif_object_mthd(&vmm->object, NVIF_VMM_V0_GET, + &args, sizeof(args)); + if (ret == 0) { + vma->addr = args.addr; + vma->size = args.size; + } + return ret; +} + +void +nvif_vmm_fini(struct nvif_vmm *vmm) +{ + kfree(vmm->page); + nvif_object_fini(&vmm->object); +} + +int +nvif_vmm_init(struct nvif_mmu *mmu, s32 oclass, u64 addr, u64 size, + void *argv, u32 argc, struct nvif_vmm *vmm) +{ + struct nvif_vmm_v0 *args; + u32 argn = sizeof(*args) + argc; + int ret = -ENOSYS, i; + + vmm->object.client = NULL; + vmm->page = NULL; + + if (!(args = kmalloc(argn, GFP_KERNEL))) + return -ENOMEM; + args->version = 0; + args->addr = addr; + args->size = size; + memcpy(args->data, argv, argc); + + ret = nvif_object_init(&mmu->object, 0, oclass, args, argn, + &vmm->object); + if (ret) + goto done; + + vmm->start = args->addr; + vmm->limit = args->size; + + vmm->page_nr = args->page_nr; + vmm->page = kmalloc(sizeof(*vmm->page) * vmm->page_nr, GFP_KERNEL); + if (!vmm->page) { + ret = -ENOMEM; + goto done; + } + + for (i = 0; i < vmm->page_nr; i++) { + struct nvif_vmm_page_v0 args = { .index = i }; + + ret = nvif_object_mthd(&vmm->object, NVIF_VMM_V0_PAGE, + &args, sizeof(args)); + if (ret) + break; + + vmm->page[i].shift = args.shift; + vmm->page[i].sparse = args.sparse; + vmm->page[i].vram = args.vram; + vmm->page[i].host = args.host; + vmm->page[i].comp = args.comp; + } + +done: + if (ret) + nvif_vmm_fini(vmm); + kfree(args); + return ret; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/core/client.c b/drivers/gpu/drm/nouveau/nvkm/core/client.c index 0d3a896892b4..ac671202919e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/client.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/client.c @@ -301,5 +301,7 @@ nvkm_client_new(const char *name, u64 device, const char *cfg, client->debug = nvkm_dbgopt(dbg, "CLIENT"); client->objroot = RB_ROOT; client->ntfy = ntfy; + INIT_LIST_HEAD(&client->umem); + spin_lock_init(&client->lock); return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c b/drivers/gpu/drm/nouveau/nvkm/core/engine.c index b6c916954a10..657231c3c098 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c @@ -126,6 +126,15 @@ nvkm_engine_init(struct nvkm_subdev *subdev) return ret; } +static int +nvkm_engine_preinit(struct nvkm_subdev *subdev) +{ + struct nvkm_engine *engine = nvkm_engine(subdev); + if (engine->func->preinit) + engine->func->preinit(engine); + return 0; +} + static void * nvkm_engine_dtor(struct nvkm_subdev *subdev) { @@ -138,6 +147,7 @@ nvkm_engine_dtor(struct nvkm_subdev *subdev) static const struct nvkm_subdev_func nvkm_engine_func = { .dtor = nvkm_engine_dtor, + .preinit = nvkm_engine_preinit, .init = nvkm_engine_init, .fini = nvkm_engine_fini, .intr = nvkm_engine_intr, diff --git a/drivers/gpu/drm/nouveau/nvkm/core/gpuobj.c b/drivers/gpu/drm/nouveau/nvkm/core/gpuobj.c index a7bd22706b2a..d6de2b3ed2c3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/gpuobj.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/gpuobj.c @@ -42,6 +42,14 @@ nvkm_gpuobj_wr32_fast(struct nvkm_gpuobj *gpuobj, u32 offset, u32 data) } /* accessor functions for gpuobjs allocated directly from instmem */ +static int +nvkm_gpuobj_heap_map(struct nvkm_gpuobj *gpuobj, u64 offset, + struct nvkm_vmm *vmm, struct nvkm_vma *vma, + void *argv, u32 argc) +{ + return nvkm_memory_map(gpuobj->memory, offset, vmm, vma, argv, argc); +} + static u32 nvkm_gpuobj_heap_rd32(struct nvkm_gpuobj *gpuobj, u32 offset) { @@ -67,6 +75,7 @@ nvkm_gpuobj_heap_fast = { .release = nvkm_gpuobj_heap_release, .rd32 = nvkm_gpuobj_rd32_fast, .wr32 = nvkm_gpuobj_wr32_fast, + .map = nvkm_gpuobj_heap_map, }; static const struct nvkm_gpuobj_func @@ -74,6 +83,7 @@ nvkm_gpuobj_heap_slow = { .release = nvkm_gpuobj_heap_release, .rd32 = nvkm_gpuobj_heap_rd32, .wr32 = nvkm_gpuobj_heap_wr32, + .map = nvkm_gpuobj_heap_map, }; static void * @@ -90,9 +100,19 @@ nvkm_gpuobj_heap_acquire(struct nvkm_gpuobj *gpuobj) static const struct nvkm_gpuobj_func nvkm_gpuobj_heap = { .acquire = nvkm_gpuobj_heap_acquire, + .map = nvkm_gpuobj_heap_map, }; /* accessor functions for gpuobjs sub-allocated from a parent gpuobj */ +static int +nvkm_gpuobj_map(struct nvkm_gpuobj *gpuobj, u64 offset, + struct nvkm_vmm *vmm, struct nvkm_vma *vma, + void *argv, u32 argc) +{ + return nvkm_memory_map(gpuobj->parent, gpuobj->node->offset + offset, + vmm, vma, argv, argc); +} + static u32 nvkm_gpuobj_rd32(struct nvkm_gpuobj *gpuobj, u32 offset) { @@ -118,6 +138,7 @@ nvkm_gpuobj_fast = { .release = nvkm_gpuobj_release, .rd32 = nvkm_gpuobj_rd32_fast, .wr32 = nvkm_gpuobj_wr32_fast, + .map = nvkm_gpuobj_map, }; static const struct nvkm_gpuobj_func @@ -125,6 +146,7 @@ nvkm_gpuobj_slow = { .release = nvkm_gpuobj_release, .rd32 = nvkm_gpuobj_rd32, .wr32 = nvkm_gpuobj_wr32, + .map = nvkm_gpuobj_map, }; static void * @@ -143,6 +165,7 @@ nvkm_gpuobj_acquire(struct nvkm_gpuobj *gpuobj) static const struct nvkm_gpuobj_func nvkm_gpuobj_func = { .acquire = nvkm_gpuobj_acquire, + .map = nvkm_gpuobj_map, }; static int @@ -185,7 +208,7 @@ nvkm_gpuobj_ctor(struct nvkm_device *device, u32 size, int align, bool zero, gpuobj->size = nvkm_memory_size(gpuobj->memory); } - return nvkm_mm_init(&gpuobj->heap, 0, gpuobj->size, 1); + return nvkm_mm_init(&gpuobj->heap, 0, 0, gpuobj->size, 1); } void @@ -196,7 +219,7 @@ nvkm_gpuobj_del(struct nvkm_gpuobj **pgpuobj) if (gpuobj->parent) nvkm_mm_free(&gpuobj->parent->heap, &gpuobj->node); nvkm_mm_fini(&gpuobj->heap); - nvkm_memory_del(&gpuobj->memory); + nvkm_memory_unref(&gpuobj->memory); kfree(*pgpuobj); *pgpuobj = NULL; } @@ -218,26 +241,6 @@ nvkm_gpuobj_new(struct nvkm_device *device, u32 size, int align, bool zero, return ret; } -int -nvkm_gpuobj_map(struct nvkm_gpuobj *gpuobj, struct nvkm_vm *vm, - u32 access, struct nvkm_vma *vma) -{ - struct nvkm_memory *memory = gpuobj->memory; - int ret = nvkm_vm_get(vm, gpuobj->size, 12, access, vma); - if (ret == 0) - nvkm_memory_map(memory, vma, 0); - return ret; -} - -void -nvkm_gpuobj_unmap(struct nvkm_vma *vma) -{ - if (vma->node) { - nvkm_vm_unmap(vma); - nvkm_vm_put(vma); - } -} - /* the below is basically only here to support sharing the paged dma object * for PCI(E)GART on <=nv4x chipsets, and should *not* be expected to work * anywhere else. diff --git a/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c b/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c index be19bbe56bba..d777df5a64e6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c @@ -53,7 +53,7 @@ nvkm_ioctl_sclass(struct nvkm_client *client, union { struct nvif_ioctl_sclass_v0 v0; } *args = data; - struct nvkm_oclass oclass; + struct nvkm_oclass oclass = { .client = client }; int ret = -ENOSYS, i = 0; nvif_ioctl(object, "sclass size %d\n", size); @@ -257,13 +257,19 @@ nvkm_ioctl_map(struct nvkm_client *client, union { struct nvif_ioctl_map_v0 v0; } *args = data; + enum nvkm_object_map type; int ret = -ENOSYS; nvif_ioctl(object, "map size %d\n", size); - if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { + if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) { nvif_ioctl(object, "map vers %d\n", args->v0.version); - ret = nvkm_object_map(object, &args->v0.handle, - &args->v0.length); + ret = nvkm_object_map(object, data, size, &type, + &args->v0.handle, + &args->v0.length); + if (type == NVKM_OBJECT_MAP_IO) + args->v0.type = NVIF_IOCTL_MAP_V0_IO; + else + args->v0.type = NVIF_IOCTL_MAP_V0_VA; } return ret; @@ -281,6 +287,7 @@ nvkm_ioctl_unmap(struct nvkm_client *client, nvif_ioctl(object, "unmap size %d\n", size); if (!(ret = nvif_unvers(ret, &data, &size, args->none))) { nvif_ioctl(object, "unmap\n"); + ret = nvkm_object_unmap(object); } return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/core/memory.c b/drivers/gpu/drm/nouveau/nvkm/core/memory.c index 8903c04c977e..e85a08ecd9da 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/memory.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/memory.c @@ -22,27 +22,117 @@ * Authors: Ben Skeggs <bskeggs@redhat.com> */ #include <core/memory.h> +#include <core/mm.h> +#include <subdev/fb.h> #include <subdev/instmem.h> void +nvkm_memory_tags_put(struct nvkm_memory *memory, struct nvkm_device *device, + struct nvkm_tags **ptags) +{ + struct nvkm_fb *fb = device->fb; + struct nvkm_tags *tags = *ptags; + if (tags) { + mutex_lock(&fb->subdev.mutex); + if (refcount_dec_and_test(&tags->refcount)) { + nvkm_mm_free(&fb->tags, &tags->mn); + kfree(memory->tags); + memory->tags = NULL; + } + mutex_unlock(&fb->subdev.mutex); + *ptags = NULL; + } +} + +int +nvkm_memory_tags_get(struct nvkm_memory *memory, struct nvkm_device *device, + u32 nr, void (*clr)(struct nvkm_device *, u32, u32), + struct nvkm_tags **ptags) +{ + struct nvkm_fb *fb = device->fb; + struct nvkm_tags *tags; + + mutex_lock(&fb->subdev.mutex); + if ((tags = memory->tags)) { + /* If comptags exist for the memory, but a different amount + * than requested, the buffer is being mapped with settings + * that are incompatible with existing mappings. + */ + if (tags->mn && tags->mn->length != nr) { + mutex_unlock(&fb->subdev.mutex); + return -EINVAL; + } + + refcount_inc(&tags->refcount); + mutex_unlock(&fb->subdev.mutex); + *ptags = tags; + return 0; + } + + if (!(tags = kmalloc(sizeof(*tags), GFP_KERNEL))) { + mutex_unlock(&fb->subdev.mutex); + return -ENOMEM; + } + + if (!nvkm_mm_head(&fb->tags, 0, 1, nr, nr, 1, &tags->mn)) { + if (clr) + clr(device, tags->mn->offset, tags->mn->length); + } else { + /* Failure to allocate HW comptags is not an error, the + * caller should fall back to an uncompressed map. + * + * As memory can be mapped in multiple places, we still + * need to track the allocation failure and ensure that + * any additional mappings remain uncompressed. + * + * This is handled by returning an empty nvkm_tags. + */ + tags->mn = NULL; + } + + refcount_set(&tags->refcount, 1); + mutex_unlock(&fb->subdev.mutex); + *ptags = tags; + return 0; +} + +void nvkm_memory_ctor(const struct nvkm_memory_func *func, struct nvkm_memory *memory) { memory->func = func; + kref_init(&memory->kref); +} + +static void +nvkm_memory_del(struct kref *kref) +{ + struct nvkm_memory *memory = container_of(kref, typeof(*memory), kref); + if (!WARN_ON(!memory->func)) { + if (memory->func->dtor) + memory = memory->func->dtor(memory); + kfree(memory); + } } void -nvkm_memory_del(struct nvkm_memory **pmemory) +nvkm_memory_unref(struct nvkm_memory **pmemory) { struct nvkm_memory *memory = *pmemory; - if (memory && !WARN_ON(!memory->func)) { - if (memory->func->dtor) - *pmemory = memory->func->dtor(memory); - kfree(*pmemory); + if (memory) { + kref_put(&memory->kref, nvkm_memory_del); *pmemory = NULL; } } +struct nvkm_memory * +nvkm_memory_ref(struct nvkm_memory *memory) +{ + if (memory) + kref_get(&memory->kref); + return memory; +} + int nvkm_memory_new(struct nvkm_device *device, enum nvkm_memory_target target, u64 size, u32 align, bool zero, diff --git a/drivers/gpu/drm/nouveau/nvkm/core/mm.c b/drivers/gpu/drm/nouveau/nvkm/core/mm.c index 5c7891234eea..f78a06a6b2f1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/mm.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/mm.c @@ -237,7 +237,7 @@ nvkm_mm_tail(struct nvkm_mm *mm, u8 heap, u8 type, u32 size_max, u32 size_min, } int -nvkm_mm_init(struct nvkm_mm *mm, u32 offset, u32 length, u32 block) +nvkm_mm_init(struct nvkm_mm *mm, u8 heap, u32 offset, u32 length, u32 block) { struct nvkm_mm_node *node, *prev; u32 next; @@ -274,7 +274,8 @@ nvkm_mm_init(struct nvkm_mm *mm, u32 offset, u32 length, u32 block) list_add_tail(&node->nl_entry, &mm->nodes); list_add_tail(&node->fl_entry, &mm->free); - node->heap = ++mm->heap_nodes; + node->heap = heap; + mm->heap_nodes++; return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/core/object.c b/drivers/gpu/drm/nouveau/nvkm/core/object.c index acd76fd4f6d8..301a5e5b5f7f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/object.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/object.c @@ -102,10 +102,19 @@ nvkm_object_ntfy(struct nvkm_object *object, u32 mthd, } int -nvkm_object_map(struct nvkm_object *object, u64 *addr, u32 *size) +nvkm_object_map(struct nvkm_object *object, void *argv, u32 argc, + enum nvkm_object_map *type, u64 *addr, u64 *size) { if (likely(object->func->map)) - return object->func->map(object, addr, size); + return object->func->map(object, argv, argc, type, addr, size); + return -ENODEV; +} + +int +nvkm_object_unmap(struct nvkm_object *object) +{ + if (likely(object->func->unmap)) + return object->func->unmap(object); return -ENODEV; } @@ -259,6 +268,7 @@ nvkm_object_dtor(struct nvkm_object *object) } nvif_debug(object, "destroy running...\n"); + nvkm_object_unmap(object); if (object->func->dtor) data = object->func->dtor(object); nvkm_engine_unref(&object->engine); diff --git a/drivers/gpu/drm/nouveau/nvkm/core/oproxy.c b/drivers/gpu/drm/nouveau/nvkm/core/oproxy.c index e31a0479add0..16299837a296 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/oproxy.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/oproxy.c @@ -37,9 +37,17 @@ nvkm_oproxy_ntfy(struct nvkm_object *object, u32 mthd, } static int -nvkm_oproxy_map(struct nvkm_object *object, u64 *addr, u32 *size) +nvkm_oproxy_map(struct nvkm_object *object, void *argv, u32 argc, + enum nvkm_object_map *type, u64 *addr, u64 *size) { - return nvkm_object_map(nvkm_oproxy(object)->object, addr, size); + struct nvkm_oproxy *oproxy = nvkm_oproxy(object); + return nvkm_object_map(oproxy->object, argv, argc, type, addr, size); +} + +static int +nvkm_oproxy_unmap(struct nvkm_object *object) +{ + return nvkm_object_unmap(nvkm_oproxy(object)->object); } static int @@ -171,6 +179,7 @@ nvkm_oproxy_func = { .mthd = nvkm_oproxy_mthd, .ntfy = nvkm_oproxy_ntfy, .map = nvkm_oproxy_map, + .unmap = nvkm_oproxy_unmap, .rd08 = nvkm_oproxy_rd08, .rd16 = nvkm_oproxy_rd16, .rd32 = nvkm_oproxy_rd32, diff --git a/drivers/gpu/drm/nouveau/nvkm/core/ramht.c b/drivers/gpu/drm/nouveau/nvkm/core/ramht.c index 89da47234016..ccba4ae73cc5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/ramht.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/ramht.c @@ -21,6 +21,7 @@ */ #include <core/ramht.h> #include <core/engine.h> +#include <core/object.h> static u32 nvkm_ramht_hash(struct nvkm_ramht *ramht, int chid, u32 handle) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c b/drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c index 8e2e24a74774..44e116f7880d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c @@ -39,5 +39,5 @@ int g84_bsp_new(struct nvkm_device *device, int index, struct nvkm_engine **pengine) { return nvkm_xtensa_new_(&g84_bsp, device, index, - true, 0x103000, pengine); + device->chipset != 0x92, 0x103000, pengine); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/fuc/gf100.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/ce/fuc/gf100.fuc3.h index d9ca9636a3e3..da130f5058e5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/fuc/gf100.fuc3.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/fuc/gf100.fuc3.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ static uint32_t gf100_ce_data[] = { /* 0x0000: ctx_object */ 0x00000000, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/fuc/gt215.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/ce/fuc/gt215.fuc3.h index f0a1cf31c7ca..0b92eb32598d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/fuc/gt215.fuc3.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/fuc/gt215.fuc3.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ static uint32_t gt215_ce_data[] = { /* 0x0000: ctx_object */ 0x00000000, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h index 2dce405976ad..0e3d08f11b0b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_CE_PRIV_H__ #define __NVKM_CE_PRIV_H__ #include <engine/ce.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/acpi.h b/drivers/gpu/drm/nouveau/nvkm/engine/device/acpi.h index 1bbe76e0740a..6a62021e9861 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/acpi.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/acpi.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_DEVICE_ACPI_H__ #define __NVKM_DEVICE_ACPI_H__ #include <core/os.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index e096a5d9c292..e14643615698 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -927,7 +927,7 @@ nv84_chipset = { .i2c = nv50_i2c_new, .imem = nv50_instmem_new, .mc = g84_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g84_pci_new, .therm = g84_therm_new, @@ -959,7 +959,7 @@ nv86_chipset = { .i2c = nv50_i2c_new, .imem = nv50_instmem_new, .mc = g84_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g84_pci_new, .therm = g84_therm_new, @@ -991,7 +991,7 @@ nv92_chipset = { .i2c = nv50_i2c_new, .imem = nv50_instmem_new, .mc = g84_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g92_pci_new, .therm = g84_therm_new, @@ -1023,7 +1023,7 @@ nv94_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = g84_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .therm = g84_therm_new, @@ -1055,7 +1055,7 @@ nv96_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = g84_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .therm = g84_therm_new, @@ -1087,7 +1087,7 @@ nv98_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = g98_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .therm = g84_therm_new, @@ -1119,7 +1119,7 @@ nva0_chipset = { .i2c = nv50_i2c_new, .imem = nv50_instmem_new, .mc = g84_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .therm = g84_therm_new, @@ -1151,7 +1151,7 @@ nva3_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = gt215_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .pmu = gt215_pmu_new, @@ -1185,7 +1185,7 @@ nva5_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = gt215_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .pmu = gt215_pmu_new, @@ -1218,7 +1218,7 @@ nva8_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = gt215_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .pmu = gt215_pmu_new, @@ -1251,7 +1251,7 @@ nvaa_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = g98_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .therm = g84_therm_new, @@ -1283,7 +1283,7 @@ nvac_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = g98_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .therm = g84_therm_new, @@ -1315,7 +1315,7 @@ nvaf_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = gt215_mc_new, - .mmu = nv50_mmu_new, + .mmu = g84_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .pmu = gt215_pmu_new, @@ -1678,7 +1678,7 @@ nve4_chipset = { .imem = nv50_instmem_new, .ltc = gk104_ltc_new, .mc = gk104_mc_new, - .mmu = gf100_mmu_new, + .mmu = gk104_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gk104_pmu_new, @@ -1717,7 +1717,7 @@ nve6_chipset = { .imem = nv50_instmem_new, .ltc = gk104_ltc_new, .mc = gk104_mc_new, - .mmu = gf100_mmu_new, + .mmu = gk104_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gk104_pmu_new, @@ -1756,7 +1756,7 @@ nve7_chipset = { .imem = nv50_instmem_new, .ltc = gk104_ltc_new, .mc = gk104_mc_new, - .mmu = gf100_mmu_new, + .mmu = gk104_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gk104_pmu_new, @@ -1790,7 +1790,7 @@ nvea_chipset = { .imem = gk20a_instmem_new, .ltc = gk104_ltc_new, .mc = gk20a_mc_new, - .mmu = gf100_mmu_new, + .mmu = gk20a_mmu_new, .pmu = gk20a_pmu_new, .timer = gk20a_timer_new, .top = gk104_top_new, @@ -1820,7 +1820,7 @@ nvf0_chipset = { .imem = nv50_instmem_new, .ltc = gk104_ltc_new, .mc = gk104_mc_new, - .mmu = gf100_mmu_new, + .mmu = gk104_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gk110_pmu_new, @@ -1858,7 +1858,7 @@ nvf1_chipset = { .imem = nv50_instmem_new, .ltc = gk104_ltc_new, .mc = gk104_mc_new, - .mmu = gf100_mmu_new, + .mmu = gk104_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gk110_pmu_new, @@ -1896,7 +1896,7 @@ nv106_chipset = { .imem = nv50_instmem_new, .ltc = gk104_ltc_new, .mc = gk20a_mc_new, - .mmu = gf100_mmu_new, + .mmu = gk104_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gk208_pmu_new, @@ -1934,7 +1934,7 @@ nv108_chipset = { .imem = nv50_instmem_new, .ltc = gk104_ltc_new, .mc = gk20a_mc_new, - .mmu = gf100_mmu_new, + .mmu = gk104_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gk208_pmu_new, @@ -1958,7 +1958,7 @@ nv108_chipset = { static const struct nvkm_device_chip nv117_chipset = { .name = "GM107", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .clk = gk104_clk_new, @@ -1972,7 +1972,7 @@ nv117_chipset = { .imem = nv50_instmem_new, .ltc = gm107_ltc_new, .mc = gk20a_mc_new, - .mmu = gf100_mmu_new, + .mmu = gk104_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gm107_pmu_new, @@ -1992,7 +1992,7 @@ nv117_chipset = { static const struct nvkm_device_chip nv118_chipset = { .name = "GM108", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .clk = gk104_clk_new, @@ -2006,7 +2006,7 @@ nv118_chipset = { .imem = nv50_instmem_new, .ltc = gm107_ltc_new, .mc = gk20a_mc_new, - .mmu = gf100_mmu_new, + .mmu = gk104_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gm107_pmu_new, @@ -2026,7 +2026,7 @@ nv118_chipset = { static const struct nvkm_device_chip nv120_chipset = { .name = "GM200", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, @@ -2039,7 +2039,7 @@ nv120_chipset = { .imem = nv50_instmem_new, .ltc = gm200_ltc_new, .mc = gk20a_mc_new, - .mmu = gf100_mmu_new, + .mmu = gm200_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gm107_pmu_new, @@ -2061,7 +2061,7 @@ nv120_chipset = { static const struct nvkm_device_chip nv124_chipset = { .name = "GM204", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, @@ -2074,7 +2074,7 @@ nv124_chipset = { .imem = nv50_instmem_new, .ltc = gm200_ltc_new, .mc = gk20a_mc_new, - .mmu = gf100_mmu_new, + .mmu = gm200_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gm107_pmu_new, @@ -2096,7 +2096,7 @@ nv124_chipset = { static const struct nvkm_device_chip nv126_chipset = { .name = "GM206", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, @@ -2109,7 +2109,7 @@ nv126_chipset = { .imem = nv50_instmem_new, .ltc = gm200_ltc_new, .mc = gk20a_mc_new, - .mmu = gf100_mmu_new, + .mmu = gm200_mmu_new, .mxm = nv50_mxm_new, .pci = gk104_pci_new, .pmu = gm107_pmu_new, @@ -2131,7 +2131,7 @@ nv126_chipset = { static const struct nvkm_device_chip nv12b_chipset = { .name = "GM20B", - .bar = gk20a_bar_new, + .bar = gm20b_bar_new, .bus = gf100_bus_new, .clk = gm20b_clk_new, .fb = gm20b_fb_new, @@ -2140,7 +2140,7 @@ nv12b_chipset = { .imem = gk20a_instmem_new, .ltc = gm200_ltc_new, .mc = gk20a_mc_new, - .mmu = gf100_mmu_new, + .mmu = gm20b_mmu_new, .pmu = gm20b_pmu_new, .secboot = gm20b_secboot_new, .timer = gk20a_timer_new, @@ -2156,7 +2156,7 @@ nv12b_chipset = { static const struct nvkm_device_chip nv130_chipset = { .name = "GP100", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, @@ -2168,7 +2168,8 @@ nv130_chipset = { .imem = nv50_instmem_new, .ltc = gp100_ltc_new, .mc = gp100_mc_new, - .mmu = gf100_mmu_new, + .mmu = gp100_mmu_new, + .therm = gp100_therm_new, .secboot = gm200_secboot_new, .pci = gp100_pci_new, .pmu = gp100_pmu_new, @@ -2190,7 +2191,7 @@ nv130_chipset = { static const struct nvkm_device_chip nv132_chipset = { .name = "GP102", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, @@ -2202,7 +2203,8 @@ nv132_chipset = { .imem = nv50_instmem_new, .ltc = gp100_ltc_new, .mc = gp100_mc_new, - .mmu = gf100_mmu_new, + .mmu = gp100_mmu_new, + .therm = gp100_therm_new, .secboot = gp102_secboot_new, .pci = gp100_pci_new, .pmu = gp102_pmu_new, @@ -2224,7 +2226,7 @@ nv132_chipset = { static const struct nvkm_device_chip nv134_chipset = { .name = "GP104", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, @@ -2236,7 +2238,8 @@ nv134_chipset = { .imem = nv50_instmem_new, .ltc = gp100_ltc_new, .mc = gp100_mc_new, - .mmu = gf100_mmu_new, + .mmu = gp100_mmu_new, + .therm = gp100_therm_new, .secboot = gp102_secboot_new, .pci = gp100_pci_new, .pmu = gp102_pmu_new, @@ -2258,7 +2261,7 @@ nv134_chipset = { static const struct nvkm_device_chip nv136_chipset = { .name = "GP106", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, @@ -2270,7 +2273,8 @@ nv136_chipset = { .imem = nv50_instmem_new, .ltc = gp100_ltc_new, .mc = gp100_mc_new, - .mmu = gf100_mmu_new, + .mmu = gp100_mmu_new, + .therm = gp100_therm_new, .secboot = gp102_secboot_new, .pci = gp100_pci_new, .pmu = gp102_pmu_new, @@ -2292,7 +2296,7 @@ nv136_chipset = { static const struct nvkm_device_chip nv137_chipset = { .name = "GP107", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, @@ -2304,7 +2308,8 @@ nv137_chipset = { .imem = nv50_instmem_new, .ltc = gp100_ltc_new, .mc = gp100_mc_new, - .mmu = gf100_mmu_new, + .mmu = gp100_mmu_new, + .therm = gp100_therm_new, .secboot = gp102_secboot_new, .pci = gp100_pci_new, .pmu = gp102_pmu_new, @@ -2326,7 +2331,7 @@ nv137_chipset = { static const struct nvkm_device_chip nv138_chipset = { .name = "GP108", - .bar = gf100_bar_new, + .bar = gm107_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, @@ -2338,7 +2343,8 @@ nv138_chipset = { .imem = nv50_instmem_new, .ltc = gp100_ltc_new, .mc = gp100_mc_new, - .mmu = gf100_mmu_new, + .mmu = gp100_mmu_new, + .therm = gp100_therm_new, .pci = gp100_pci_new, .pmu = gp102_pmu_new, .timer = gk20a_timer_new, @@ -2355,7 +2361,7 @@ nv138_chipset = { static const struct nvkm_device_chip nv13b_chipset = { .name = "GP10B", - .bar = gk20a_bar_new, + .bar = gm20b_bar_new, .bus = gf100_bus_new, .fb = gp10b_fb_new, .fuse = gm107_fuse_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.h b/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.h index 20249d8e444d..ebcc5c52fbd1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.h @@ -1,7 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_DEVICE_CTRL_H__ #define __NVKM_DEVICE_CTRL_H__ #define nvkm_control(p) container_of((p), struct nvkm_control, object) -#include <core/device.h> +#include <core/object.h> struct nvkm_control { struct nvkm_object object; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c index 74a1ffa425f7..f302d2b5782a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c @@ -1627,7 +1627,7 @@ nvkm_device_pci_new(struct pci_dev *pci_dev, const char *cfg, const char *dbg, const struct nvkm_device_pci_vendor *pciv; const char *name = NULL; struct nvkm_device_pci *pdev; - int ret; + int ret, bits; ret = pci_enable_device(pci_dev); if (ret) @@ -1679,17 +1679,17 @@ nvkm_device_pci_new(struct pci_dev *pci_dev, const char *cfg, const char *dbg, if (ret) return ret; - /* - * Set a preliminary DMA mask based on the .dma_bits member of the - * MMU subdevice. This allows other subdevices to create DMA mappings - * in their init() or oneinit() methods, which may be called before the - * TTM layer sets the DMA mask definitively. - * This is necessary for platforms where the default DMA mask of 32 - * does not cover any system memory, i.e., when all RAM is > 4 GB. - */ - if (pdev->device.mmu) - dma_set_mask_and_coherent(&pci_dev->dev, - DMA_BIT_MASK(pdev->device.mmu->dma_bits)); + /* Set DMA mask based on capabilities reported by the MMU subdev. */ + if (pdev->device.mmu && !pdev->device.pci->agp.bridge) + bits = pdev->device.mmu->dma_bits; + else + bits = 32; + + ret = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(bits)); + if (ret && bits != 32) { + dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)); + pdev->device.mmu->dma_bits = 32; + } return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h index 6c16f3835f44..08d0bf605722 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_DEVICE_PRIV_H__ #define __NVKM_DEVICE_PRIV_H__ #include <core/device.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c index 189ed80e21ff..78597da6313a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c @@ -136,7 +136,7 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev) if (ret) goto free_domain; - ret = nvkm_mm_init(&tdev->iommu.mm, 0, + ret = nvkm_mm_init(&tdev->iommu.mm, 0, 0, (1ULL << tdev->func->iommu_bit) >> tdev->iommu.pgshift, 1); if (ret) @@ -216,7 +216,7 @@ nvkm_device_tegra_fini(struct nvkm_device *device, bool suspend) if (tdev->irq) { free_irq(tdev->irq, tdev); tdev->irq = 0; - }; + } } static int @@ -309,8 +309,6 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func, /** * The IOMMU bit defines the upper limit of the GPU-addressable space. - * This will be refined in nouveau_ttm_init but we need to do it early - * for instmem to behave properly */ ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(tdev->func->iommu_bit)); if (ret) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c index 513ee6b79553..17adcb4e8854 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c @@ -206,10 +206,12 @@ nvkm_udevice_wr32(struct nvkm_object *object, u64 addr, u32 data) } static int -nvkm_udevice_map(struct nvkm_object *object, u64 *addr, u32 *size) +nvkm_udevice_map(struct nvkm_object *object, void *argv, u32 argc, + enum nvkm_object_map *type, u64 *addr, u64 *size) { struct nvkm_udevice *udev = nvkm_udevice(object); struct nvkm_device *device = udev->device; + *type = NVKM_OBJECT_MAP_IO; *addr = device->func->resource_addr(device, 0); *size = device->func->resource_size(device, 0); return 0; @@ -292,6 +294,11 @@ nvkm_udevice_child_get(struct nvkm_object *object, int index, if (!sclass) { switch (index) { case 0: sclass = &nvkm_control_oclass; break; + case 1: + if (!device->mmu) + return -EINVAL; + sclass = &device->mmu->user; + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c index 0c0310498afd..723dcbde2ac2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c @@ -191,11 +191,13 @@ nv50_disp_chan_ntfy(struct nvkm_object *object, u32 type, } static int -nv50_disp_chan_map(struct nvkm_object *object, u64 *addr, u32 *size) +nv50_disp_chan_map(struct nvkm_object *object, void *argv, u32 argc, + enum nvkm_object_map *type, u64 *addr, u64 *size) { struct nv50_disp_chan *chan = nv50_disp_chan(object); struct nv50_disp *disp = chan->root->disp; struct nvkm_device *device = disp->base.engine.subdev.device; + *type = NVKM_OBJECT_MAP_IO; *addr = device->func->resource_addr(device, 0) + 0x640000 + (chan->chid.user * 0x1000); *size = 0x001000; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h index 737b38f6fbd2..40681db91a02 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h @@ -1,6 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV50_DISP_CHAN_H__ #define __NV50_DISP_CHAN_H__ #define nv50_disp_chan(p) container_of((p), struct nv50_disp_chan, object) +#include <core/object.h> #include "nv50.h" struct nv50_disp_chan { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.h index de962b7b026d..090e869ae612 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_DISP_CONN_H__ #define __NVKM_DISP_CONN_H__ #include <engine/disp.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h index ea4a0d062e31..f9b98211da6a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV50_DISP_DMAC_H__ #define __NV50_DISP_DMAC_H__ #define nv50_disp_dmac(p) container_of((p), struct nv50_disp_dmac, base) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h index 59173c290525..495f665a0ee6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_DISP_DP_H__ #define __NVKM_DISP_DP_H__ #define nvkm_dp(p) container_of((p), struct nvkm_dp, outp) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmi.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmi.c index e82c68f18444..d131cca999dd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmi.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmi.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "hdmi.h" void pack_hdmi_infoframe(struct packed_hdmi_infoframe *packed_frame, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmi.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmi.h index 528f5621a496..45094c6e1425 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmi.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmi.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_DISP_HDMI_H__ #define __NVKM_DISP_HDMI_H__ #include "ior.h" diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h index b04c49d2eeeb..57030b3a4a75 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_DISP_HEAD_H__ #define __NVKM_DISP_HEAD_H__ #include "priv.h" diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h index a1e8bf48b778..4548c031b937 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_DISP_IOR_H__ #define __NVKM_DISP_IOR_H__ #include "priv.h" @@ -147,7 +148,7 @@ void gf119_hda_eld(struct nvkm_ior *, u8 *, u8); #define IOR_MSG(i,l,f,a...) do { \ struct nvkm_ior *_ior = (i); \ - nvkm_##l(&_ior->disp->engine.subdev, "%s: "f, _ior->name, ##a); \ + nvkm_##l(&_ior->disp->engine.subdev, "%s: "f"\n", _ior->name, ##a); \ } while(0) #define IOR_WARN(i,f,a...) IOR_MSG((i), warn, f, ##a) #define IOR_DBG(i,f,a...) IOR_MSG((i), debug, f, ##a) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h index 6ea19466f436..eb0b8acb1c5b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV50_DISP_H__ #define __NV50_DISP_H__ #define nv50_disp(p) container_of((p), struct nv50_disp, base) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h index 146d101d4891..ea84d7d5741a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_DISP_OUTP_H__ #define __NVKM_DISP_OUTP_H__ #include <engine/disp.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h index 5772f0094129..6c9bfff6d043 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_DISP_PRIV_H__ #define __NVKM_DISP_PRIV_H__ #include <engine/disp.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h index b147cf5b3518..4818fa69ae6c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV50_DISP_ROOT_H__ #define __NV50_DISP_ROOT_H__ #define nv50_disp_root(p) container_of((p), struct nv50_disp_root, object) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/dma/priv.h index deb37ee55c0b..4307cbecd5c5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/dma/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_DMA_PRIV_H__ #define __NVKM_DMA_PRIV_H__ #define nvkm_dma(p) container_of((p), struct nvkm_dma, engine) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.h b/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.h index 69a7f1034024..4bbac8a21c71 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_DMA_USER_H__ #define __NVKM_DMA_USER_H__ #define nvkm_dmaobj(p) container_of((p), struct nvkm_dmaobj, object) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/usernv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/dma/usernv04.c index c95942ef8216..49ef7e57aad4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/dma/usernv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/usernv04.c @@ -26,7 +26,7 @@ #include <core/gpuobj.h> #include <subdev/fb.h> -#include <subdev/mmu/nv04.h> +#include <subdev/mmu/vmm.h> #include <nvif/class.h> @@ -49,8 +49,8 @@ nv04_dmaobj_bind(struct nvkm_dmaobj *base, struct nvkm_gpuobj *parent, int ret; if (dmaobj->clone) { - struct nv04_mmu *mmu = nv04_mmu(device->mmu); - struct nvkm_memory *pgt = mmu->vm->pgt[0].mem[0]; + struct nvkm_memory *pgt = + device->mmu->vmm->pd->pt[0]->memory; if (!dmaobj->base.start) return nvkm_gpuobj_wrap(pgt, pgpuobj); nvkm_kmap(pgt); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c b/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c index 2e7b4e2105ef..816ccaedfc73 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c @@ -99,7 +99,7 @@ nvkm_falcon_fini(struct nvkm_engine *engine, bool suspend) const u32 base = falcon->addr; if (!suspend) { - nvkm_memory_del(&falcon->core); + nvkm_memory_unref(&falcon->core); if (falcon->external) { vfree(falcon->data.data); vfree(falcon->code.data); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c index 660ca7aa95ea..64f6b7654a08 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c @@ -27,6 +27,7 @@ #include <core/client.h> #include <core/gpuobj.h> #include <core/notify.h> +#include <subdev/mc.h> #include <nvif/event.h> #include <nvif/unpack.h> @@ -278,6 +279,12 @@ nvkm_fifo_oneinit(struct nvkm_engine *engine) return 0; } +static void +nvkm_fifo_preinit(struct nvkm_engine *engine) +{ + nvkm_mc_reset(engine->subdev.device, NVKM_ENGINE_FIFO); +} + static int nvkm_fifo_init(struct nvkm_engine *engine) { @@ -302,6 +309,7 @@ nvkm_fifo_dtor(struct nvkm_engine *engine) static const struct nvkm_engine_func nvkm_fifo = { .dtor = nvkm_fifo_dtor, + .preinit = nvkm_fifo_preinit, .oneinit = nvkm_fifo_oneinit, .init = nvkm_fifo_init, .fini = nvkm_fifo_fini, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c index fab760ae922f..d83485385934 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c @@ -117,8 +117,8 @@ nvkm_fifo_chan_child_del(struct nvkm_oproxy *base) if (chan->func->engine_dtor) chan->func->engine_dtor(chan, engine); nvkm_object_del(&engn->object); - if (chan->vm) - atomic_dec(&chan->vm->engref[engine->subdev.index]); + if (chan->vmm) + atomic_dec(&chan->vmm->engref[engine->subdev.index]); } } @@ -151,8 +151,8 @@ nvkm_fifo_chan_child_new(const struct nvkm_oclass *oclass, void *data, u32 size, .engine = oclass->engine, }; - if (chan->vm) - atomic_inc(&chan->vm->engref[engine->subdev.index]); + if (chan->vmm) + atomic_inc(&chan->vmm->engref[engine->subdev.index]); if (engine->func->fifo.cclass) { ret = engine->func->fifo.cclass(chan, &cclass, @@ -253,9 +253,11 @@ nvkm_fifo_chan_ntfy(struct nvkm_object *object, u32 type, } static int -nvkm_fifo_chan_map(struct nvkm_object *object, u64 *addr, u32 *size) +nvkm_fifo_chan_map(struct nvkm_object *object, void *argv, u32 argc, + enum nvkm_object_map *type, u64 *addr, u64 *size) { struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object); + *type = NVKM_OBJECT_MAP_IO; *addr = chan->addr; *size = chan->size; return 0; @@ -325,7 +327,10 @@ nvkm_fifo_chan_dtor(struct nvkm_object *object) if (chan->user) iounmap(chan->user); - nvkm_vm_ref(NULL, &chan->vm, NULL); + if (chan->vmm) { + nvkm_vmm_part(chan->vmm, chan->inst->memory); + nvkm_vmm_unref(&chan->vmm); + } nvkm_gpuobj_del(&chan->push); nvkm_gpuobj_del(&chan->inst); @@ -347,13 +352,12 @@ nvkm_fifo_chan_func = { int nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func, struct nvkm_fifo *fifo, u32 size, u32 align, bool zero, - u64 vm, u64 push, u64 engines, int bar, u32 base, u32 user, - const struct nvkm_oclass *oclass, + u64 hvmm, u64 push, u64 engines, int bar, u32 base, + u32 user, const struct nvkm_oclass *oclass, struct nvkm_fifo_chan *chan) { struct nvkm_client *client = oclass->client; struct nvkm_device *device = fifo->engine.subdev.device; - struct nvkm_mmu *mmu = device->mmu; struct nvkm_dmaobj *dmaobj; unsigned long flags; int ret; @@ -382,16 +386,19 @@ nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func, } /* channel address space */ - if (!vm && mmu) { - if (!client->vm || client->vm->mmu == mmu) { - ret = nvkm_vm_ref(client->vm, &chan->vm, NULL); - if (ret) - return ret; - } else { + if (hvmm) { + struct nvkm_vmm *vmm = nvkm_uvmm_search(client, hvmm); + if (IS_ERR(vmm)) + return PTR_ERR(vmm); + + if (vmm->mmu != device->mmu) return -EINVAL; - } - } else { - return -ENOENT; + + ret = nvkm_vmm_join(vmm, chan->inst->memory); + if (ret) + return ret; + + chan->vmm = nvkm_vmm_ref(vmm); } /* allocate channel id */ diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h index d8019bdacd61..3ffef236189e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_FIFO_CHAN_H__ #define __NVKM_FIFO_CHAN_H__ #define nvkm_fifo_chan(p) container_of((p), struct nvkm_fifo_chan, object) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c index 61797c4dd07a..a5c998fe4485 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c @@ -229,15 +229,18 @@ g84_fifo_chan_func = { }; int -g84_fifo_chan_ctor(struct nv50_fifo *fifo, u64 vm, u64 push, +g84_fifo_chan_ctor(struct nv50_fifo *fifo, u64 vmm, u64 push, const struct nvkm_oclass *oclass, struct nv50_fifo_chan *chan) { struct nvkm_device *device = fifo->base.engine.subdev.device; int ret; + if (!vmm) + return -EINVAL; + ret = nvkm_fifo_chan_ctor(&g84_fifo_chan_func, &fifo->base, - 0x10000, 0x1000, false, vm, push, + 0x10000, 0x1000, false, vmm, push, (1ULL << NVKM_ENGINE_BSP) | (1ULL << NVKM_ENGINE_CE0) | (1ULL << NVKM_ENGINE_CIPHER) | @@ -277,9 +280,5 @@ g84_fifo_chan_ctor(struct nv50_fifo *fifo, u64 vm, u64 push, if (ret) return ret; - ret = nvkm_ramht_new(device, 0x8000, 16, chan->base.inst, &chan->ramht); - if (ret) - return ret; - - return nvkm_vm_ref(chan->base.vm, &chan->vm, chan->pgd); + return nvkm_ramht_new(device, 0x8000, 16, chan->base.inst, &chan->ramht); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changf100.h index 7d697e2dce1a..b653664e081b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changf100.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __GF100_FIFO_CHAN_H__ #define __GF100_FIFO_CHAN_H__ #define gf100_fifo_chan(p) container_of((p), struct gf100_fifo_chan, base) @@ -11,12 +12,9 @@ struct gf100_fifo_chan { struct list_head head; bool killed; - struct nvkm_gpuobj *pgd; - struct nvkm_vm *vm; - struct { struct nvkm_gpuobj *inst; - struct nvkm_vma vma; + struct nvkm_vma *vma; } engn[NVKM_SUBDEV_NR]; }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h index 230f64e5f731..1208e3d9dbe2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __GK104_FIFO_CHAN_H__ #define __GK104_FIFO_CHAN_H__ #define gk104_fifo_chan(p) container_of((p), struct gk104_fifo_chan, base) @@ -12,12 +13,9 @@ struct gk104_fifo_chan { struct list_head head; bool killed; - struct nvkm_gpuobj *pgd; - struct nvkm_vm *vm; - struct { struct nvkm_gpuobj *inst; - struct nvkm_vma vma; + struct nvkm_vma *vma; } engn[NVKM_SUBDEV_NR]; }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv04.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv04.h index 3361a1fd0343..15b06bdf5067 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv04.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv04.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV04_FIFO_CHAN_H__ #define __NV04_FIFO_CHAN_H__ #define nv04_fifo_chan(p) container_of((p), struct nv04_fifo_chan, base) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.c index 25b60aff40e4..85f7dbf53c99 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.c @@ -206,7 +206,6 @@ void * nv50_fifo_chan_dtor(struct nvkm_fifo_chan *base) { struct nv50_fifo_chan *chan = nv50_fifo_chan(base); - nvkm_vm_ref(NULL, &chan->vm, chan->pgd); nvkm_ramht_del(&chan->ramht); nvkm_gpuobj_del(&chan->pgd); nvkm_gpuobj_del(&chan->eng); @@ -229,15 +228,18 @@ nv50_fifo_chan_func = { }; int -nv50_fifo_chan_ctor(struct nv50_fifo *fifo, u64 vm, u64 push, +nv50_fifo_chan_ctor(struct nv50_fifo *fifo, u64 vmm, u64 push, const struct nvkm_oclass *oclass, struct nv50_fifo_chan *chan) { struct nvkm_device *device = fifo->base.engine.subdev.device; int ret; + if (!vmm) + return -EINVAL; + ret = nvkm_fifo_chan_ctor(&nv50_fifo_chan_func, &fifo->base, - 0x10000, 0x1000, false, vm, push, + 0x10000, 0x1000, false, vmm, push, (1ULL << NVKM_ENGINE_DMAOBJ) | (1ULL << NVKM_ENGINE_SW) | (1ULL << NVKM_ENGINE_GR) | @@ -262,9 +264,5 @@ nv50_fifo_chan_ctor(struct nv50_fifo *fifo, u64 vm, u64 push, if (ret) return ret; - ret = nvkm_ramht_new(device, 0x8000, 16, chan->base.inst, &chan->ramht); - if (ret) - return ret; - - return nvkm_vm_ref(chan->base.vm, &chan->vm, chan->pgd); + return nvkm_ramht_new(device, 0x8000, 16, chan->base.inst, &chan->ramht); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h index 4b9da469b704..2e3c4005b874 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV50_FIFO_CHAN_H__ #define __NV50_FIFO_CHAN_H__ #define nv50_fifo_chan(p) container_of((p), struct nv50_fifo_chan, base) @@ -13,19 +14,18 @@ struct nv50_fifo_chan { struct nvkm_gpuobj *eng; struct nvkm_gpuobj *pgd; struct nvkm_ramht *ramht; - struct nvkm_vm *vm; struct nvkm_gpuobj *engn[NVKM_SUBDEV_NR]; }; -int nv50_fifo_chan_ctor(struct nv50_fifo *, u64 vm, u64 push, +int nv50_fifo_chan_ctor(struct nv50_fifo *, u64 vmm, u64 push, const struct nvkm_oclass *, struct nv50_fifo_chan *); void *nv50_fifo_chan_dtor(struct nvkm_fifo_chan *); void nv50_fifo_chan_fini(struct nvkm_fifo_chan *); void nv50_fifo_chan_engine_dtor(struct nvkm_fifo_chan *, struct nvkm_engine *); void nv50_fifo_chan_object_dtor(struct nvkm_fifo_chan *, int); -int g84_fifo_chan_ctor(struct nv50_fifo *, u64 vm, u64 push, +int g84_fifo_chan_ctor(struct nv50_fifo *, u64 vmm, u64 push, const struct nvkm_oclass *, struct nv50_fifo_chan *); extern const struct nvkm_fifo_chan_oclass nv50_fifo_dma_oclass; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmag84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmag84.c index caa914074752..fc34cddcd2f5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmag84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmag84.c @@ -44,9 +44,9 @@ g84_fifo_dma_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, nvif_ioctl(parent, "create channel dma size %d\n", size); if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { - nvif_ioctl(parent, "create channel dma vers %d vm %llx " + nvif_ioctl(parent, "create channel dma vers %d vmm %llx " "pushbuf %llx offset %016llx\n", - args->v0.version, args->v0.vm, args->v0.pushbuf, + args->v0.version, args->v0.vmm, args->v0.pushbuf, args->v0.offset); if (!args->v0.pushbuf) return -EINVAL; @@ -57,7 +57,7 @@ g84_fifo_dma_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, return -ENOMEM; *pobject = &chan->base.object; - ret = g84_fifo_chan_ctor(fifo, args->v0.vm, args->v0.pushbuf, + ret = g84_fifo_chan_ctor(fifo, args->v0.vmm, args->v0.pushbuf, oclass, chan); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c index 0a7b6ed5ed28..c213122cf088 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c @@ -95,6 +95,7 @@ nv04_fifo_dma_fini(struct nvkm_fifo_chan *base) nvkm_mask(device, NV04_PFIFO_CACHE1_PULL0, 0x00000001, 0); c = fifo->ramfc; + nvkm_kmap(fctx); do { u32 rm = ((1ULL << c->bits) - 1) << c->regs; u32 cm = ((1ULL << c->bits) - 1) << c->ctxs; @@ -102,6 +103,7 @@ nv04_fifo_dma_fini(struct nvkm_fifo_chan *base) u32 cv = (nvkm_ro32(fctx, c->ctxp + data) & ~cm); nvkm_wo32(fctx, c->ctxp + data, cv | (rv << c->ctxs)); } while ((++c)->bits); + nvkm_done(fctx); c = fifo->ramfc; do { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv50.c index 480bc3777be5..8043718ad150 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv50.c @@ -44,9 +44,9 @@ nv50_fifo_dma_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, nvif_ioctl(parent, "create channel dma size %d\n", size); if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { - nvif_ioctl(parent, "create channel dma vers %d vm %llx " + nvif_ioctl(parent, "create channel dma vers %d vmm %llx " "pushbuf %llx offset %016llx\n", - args->v0.version, args->v0.vm, args->v0.pushbuf, + args->v0.version, args->v0.vmm, args->v0.pushbuf, args->v0.offset); if (!args->v0.pushbuf) return -EINVAL; @@ -57,7 +57,7 @@ nv50_fifo_dma_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, return -ENOMEM; *pobject = &chan->base.object; - ret = nv50_fifo_chan_ctor(fifo, args->v0.vm, args->v0.pushbuf, + ret = nv50_fifo_chan_ctor(fifo, args->v0.vmm, args->v0.pushbuf, oclass, chan); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c index cd468ab1db12..f69576868164 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c @@ -559,6 +559,7 @@ gf100_fifo_oneinit(struct nvkm_fifo *base) struct gf100_fifo *fifo = gf100_fifo(base); struct nvkm_subdev *subdev = &fifo->base.engine.subdev; struct nvkm_device *device = subdev->device; + struct nvkm_vmm *bar = nvkm_bar_bar1_vmm(device); int ret; /* Determine number of PBDMAs by checking valid enable bits. */ @@ -584,12 +585,12 @@ gf100_fifo_oneinit(struct nvkm_fifo *base) if (ret) return ret; - ret = nvkm_bar_umap(device->bar, 128 * 0x1000, 12, &fifo->user.bar); + ret = nvkm_vmm_get(bar, 12, nvkm_memory_size(fifo->user.mem), + &fifo->user.bar); if (ret) return ret; - nvkm_memory_map(fifo->user.mem, &fifo->user.bar, 0); - return 0; + return nvkm_memory_map(fifo->user.mem, 0, bar, fifo->user.bar, NULL, 0); } static void @@ -628,7 +629,7 @@ gf100_fifo_init(struct nvkm_fifo *base) } nvkm_mask(device, 0x002200, 0x00000001, 0x00000001); - nvkm_wr32(device, 0x002254, 0x10000000 | fifo->user.bar.offset >> 12); + nvkm_wr32(device, 0x002254, 0x10000000 | fifo->user.bar->addr >> 12); nvkm_wr32(device, 0x002100, 0xffffffff); nvkm_wr32(device, 0x002140, 0x7fffffff); @@ -639,10 +640,11 @@ static void * gf100_fifo_dtor(struct nvkm_fifo *base) { struct gf100_fifo *fifo = gf100_fifo(base); - nvkm_vm_put(&fifo->user.bar); - nvkm_memory_del(&fifo->user.mem); - nvkm_memory_del(&fifo->runlist.mem[0]); - nvkm_memory_del(&fifo->runlist.mem[1]); + struct nvkm_device *device = fifo->base.engine.subdev.device; + nvkm_vmm_put(nvkm_bar_bar1_vmm(device), &fifo->user.bar); + nvkm_memory_unref(&fifo->user.mem); + nvkm_memory_unref(&fifo->runlist.mem[0]); + nvkm_memory_unref(&fifo->runlist.mem[1]); return fifo; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.h index 70db58eab9c3..68f97ba03df6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __GF100_FIFO_H__ #define __GF100_FIFO_H__ #define gf100_fifo(p) container_of((p), struct gf100_fifo, base) @@ -26,7 +27,7 @@ struct gf100_fifo { struct { struct nvkm_memory *mem; - struct nvkm_vma bar; + struct nvkm_vma *bar; } user; }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c index a7e55c422501..84bd703dd897 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c @@ -771,6 +771,7 @@ gk104_fifo_oneinit(struct nvkm_fifo *base) struct gk104_fifo *fifo = gk104_fifo(base); struct nvkm_subdev *subdev = &fifo->base.engine.subdev; struct nvkm_device *device = subdev->device; + struct nvkm_vmm *bar = nvkm_bar_bar1_vmm(device); int engn, runl, pbid, ret, i, j; enum nvkm_devidx engidx; u32 *map; @@ -834,13 +835,12 @@ gk104_fifo_oneinit(struct nvkm_fifo *base) if (ret) return ret; - ret = nvkm_bar_umap(device->bar, fifo->base.nr * 0x200, 12, - &fifo->user.bar); + ret = nvkm_vmm_get(bar, 12, nvkm_memory_size(fifo->user.mem), + &fifo->user.bar); if (ret) return ret; - nvkm_memory_map(fifo->user.mem, &fifo->user.bar, 0); - return 0; + return nvkm_memory_map(fifo->user.mem, 0, bar, fifo->user.bar, NULL, 0); } static void @@ -866,7 +866,7 @@ gk104_fifo_init(struct nvkm_fifo *base) nvkm_wr32(device, 0x04014c + (i * 0x2000), 0xffffffff); /* INTREN */ } - nvkm_wr32(device, 0x002254, 0x10000000 | fifo->user.bar.offset >> 12); + nvkm_wr32(device, 0x002254, 0x10000000 | fifo->user.bar->addr >> 12); nvkm_wr32(device, 0x002100, 0xffffffff); nvkm_wr32(device, 0x002140, 0x7fffffff); @@ -876,14 +876,15 @@ static void * gk104_fifo_dtor(struct nvkm_fifo *base) { struct gk104_fifo *fifo = gk104_fifo(base); + struct nvkm_device *device = fifo->base.engine.subdev.device; int i; - nvkm_vm_put(&fifo->user.bar); - nvkm_memory_del(&fifo->user.mem); + nvkm_vmm_put(nvkm_bar_bar1_vmm(device), &fifo->user.bar); + nvkm_memory_unref(&fifo->user.mem); for (i = 0; i < fifo->runlist_nr; i++) { - nvkm_memory_del(&fifo->runlist[i].mem[1]); - nvkm_memory_del(&fifo->runlist[i].mem[0]); + nvkm_memory_unref(&fifo->runlist[i].mem[1]); + nvkm_memory_unref(&fifo->runlist[i].mem[0]); } return fifo; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h index 44bff98d6725..1579785cf941 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __GK104_FIFO_H__ #define __GK104_FIFO_H__ #define gk104_fifo(p) container_of((p), struct gk104_fifo, base) @@ -37,7 +38,7 @@ struct gk104_fifo { struct { struct nvkm_memory *mem; - struct nvkm_vma bar; + struct nvkm_vma *bar; } user; }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifog84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifog84.c index 77c2f2a28bf3..2121f517b1dd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifog84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifog84.c @@ -45,10 +45,10 @@ g84_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, nvif_ioctl(parent, "create channel gpfifo size %d\n", size); if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { - nvif_ioctl(parent, "create channel gpfifo vers %d vm %llx " + nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx " "pushbuf %llx ioffset %016llx " "ilength %08x\n", - args->v0.version, args->v0.vm, args->v0.pushbuf, + args->v0.version, args->v0.vmm, args->v0.pushbuf, args->v0.ioffset, args->v0.ilength); if (!args->v0.pushbuf) return -EINVAL; @@ -59,7 +59,7 @@ g84_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, return -ENOMEM; *pobject = &chan->base.object; - ret = g84_fifo_chan_ctor(fifo, args->v0.vm, args->v0.pushbuf, + ret = g84_fifo_chan_ctor(fifo, args->v0.vmm, args->v0.pushbuf, oclass, chan); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c index f9e0377d3d24..75f9632789b3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c @@ -111,7 +111,7 @@ gf100_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *base, struct nvkm_gpuobj *inst = chan->base.inst; if (offset) { - u64 addr = chan->engn[engine->subdev.index].vma.offset; + u64 addr = chan->engn[engine->subdev.index].vma->addr; nvkm_kmap(inst); nvkm_wo32(inst, offset + 0x00, lower_32_bits(addr) | 4); nvkm_wo32(inst, offset + 0x04, upper_32_bits(addr)); @@ -126,7 +126,7 @@ gf100_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *base, struct nvkm_engine *engine) { struct gf100_fifo_chan *chan = gf100_fifo_chan(base); - nvkm_gpuobj_unmap(&chan->engn[engine->subdev.index].vma); + nvkm_vmm_put(chan->base.vmm, &chan->engn[engine->subdev.index].vma); nvkm_gpuobj_del(&chan->engn[engine->subdev.index].inst); } @@ -146,8 +146,13 @@ gf100_fifo_gpfifo_engine_ctor(struct nvkm_fifo_chan *base, if (ret) return ret; - return nvkm_gpuobj_map(chan->engn[engn].inst, chan->vm, - NV_MEM_ACCESS_RW, &chan->engn[engn].vma); + ret = nvkm_vmm_get(chan->base.vmm, 12, chan->engn[engn].inst->size, + &chan->engn[engn].vma); + if (ret) + return ret; + + return nvkm_memory_map(chan->engn[engn].inst, 0, chan->base.vmm, + chan->engn[engn].vma, NULL, 0); } static void @@ -190,10 +195,7 @@ gf100_fifo_gpfifo_init(struct nvkm_fifo_chan *base) static void * gf100_fifo_gpfifo_dtor(struct nvkm_fifo_chan *base) { - struct gf100_fifo_chan *chan = gf100_fifo_chan(base); - nvkm_vm_ref(NULL, &chan->vm, chan->pgd); - nvkm_gpuobj_del(&chan->pgd); - return chan; + return gf100_fifo_chan(base); } static const struct nvkm_fifo_chan_func @@ -216,7 +218,6 @@ gf100_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, struct fermi_channel_gpfifo_v0 v0; } *args = data; struct gf100_fifo *fifo = gf100_fifo(base); - struct nvkm_device *device = fifo->base.engine.subdev.device; struct nvkm_object *parent = oclass->parent; struct gf100_fifo_chan *chan; u64 usermem, ioffset, ilength; @@ -224,10 +225,12 @@ gf100_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, nvif_ioctl(parent, "create channel gpfifo size %d\n", size); if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { - nvif_ioctl(parent, "create channel gpfifo vers %d vm %llx " + nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx " "ioffset %016llx ilength %08x\n", - args->v0.version, args->v0.vm, args->v0.ioffset, + args->v0.version, args->v0.vmm, args->v0.ioffset, args->v0.ilength); + if (!args->v0.vmm) + return -EINVAL; } else return ret; @@ -239,7 +242,7 @@ gf100_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, INIT_LIST_HEAD(&chan->head); ret = nvkm_fifo_chan_ctor(&gf100_fifo_gpfifo_func, &fifo->base, - 0x1000, 0x1000, true, args->v0.vm, 0, + 0x1000, 0x1000, true, args->v0.vmm, 0, (1ULL << NVKM_ENGINE_CE0) | (1ULL << NVKM_ENGINE_CE1) | (1ULL << NVKM_ENGINE_GR) | @@ -247,29 +250,13 @@ gf100_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, (1ULL << NVKM_ENGINE_MSPPP) | (1ULL << NVKM_ENGINE_MSVLD) | (1ULL << NVKM_ENGINE_SW), - 1, fifo->user.bar.offset, 0x1000, + 1, fifo->user.bar->addr, 0x1000, oclass, &chan->base); if (ret) return ret; args->v0.chid = chan->base.chid; - /* page directory */ - ret = nvkm_gpuobj_new(device, 0x10000, 0x1000, false, NULL, &chan->pgd); - if (ret) - return ret; - - nvkm_kmap(chan->base.inst); - nvkm_wo32(chan->base.inst, 0x0200, lower_32_bits(chan->pgd->addr)); - nvkm_wo32(chan->base.inst, 0x0204, upper_32_bits(chan->pgd->addr)); - nvkm_wo32(chan->base.inst, 0x0208, 0xffffffff); - nvkm_wo32(chan->base.inst, 0x020c, 0x000000ff); - nvkm_done(chan->base.inst); - - ret = nvkm_vm_ref(chan->base.vm, &chan->vm, chan->pgd); - if (ret) - return ret; - /* clear channel control registers */ usermem = chan->base.chid * 0x1000; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c index 8abf6f8ef445..80c87521bebe 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c @@ -117,7 +117,7 @@ gk104_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *base, u32 offset = gk104_fifo_gpfifo_engine_addr(engine); if (offset) { - u64 addr = chan->engn[engine->subdev.index].vma.offset; + u64 addr = chan->engn[engine->subdev.index].vma->addr; u32 datalo = lower_32_bits(addr) | 0x00000004; u32 datahi = upper_32_bits(addr); nvkm_kmap(inst); @@ -138,7 +138,7 @@ gk104_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *base, struct nvkm_engine *engine) { struct gk104_fifo_chan *chan = gk104_fifo_chan(base); - nvkm_gpuobj_unmap(&chan->engn[engine->subdev.index].vma); + nvkm_vmm_put(chan->base.vmm, &chan->engn[engine->subdev.index].vma); nvkm_gpuobj_del(&chan->engn[engine->subdev.index].inst); } @@ -158,8 +158,13 @@ gk104_fifo_gpfifo_engine_ctor(struct nvkm_fifo_chan *base, if (ret) return ret; - return nvkm_gpuobj_map(chan->engn[engn].inst, chan->vm, - NV_MEM_ACCESS_RW, &chan->engn[engn].vma); + ret = nvkm_vmm_get(chan->base.vmm, 12, chan->engn[engn].inst->size, + &chan->engn[engn].vma); + if (ret) + return ret; + + return nvkm_memory_map(chan->engn[engn].inst, 0, chan->base.vmm, + chan->engn[engn].vma, NULL, 0); } static void @@ -203,10 +208,7 @@ gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *base) static void * gk104_fifo_gpfifo_dtor(struct nvkm_fifo_chan *base) { - struct gk104_fifo_chan *chan = gk104_fifo_chan(base); - nvkm_vm_ref(NULL, &chan->vm, chan->pgd); - nvkm_gpuobj_del(&chan->pgd); - return chan; + return gk104_fifo_chan(base); } static const struct nvkm_fifo_chan_func @@ -229,17 +231,19 @@ struct gk104_fifo_chan_func { static int gk104_fifo_gpfifo_new_(const struct gk104_fifo_chan_func *func, struct gk104_fifo *fifo, u32 *engmask, u16 *chid, - u64 vm, u64 ioffset, u64 ilength, + u64 vmm, u64 ioffset, u64 ilength, const struct nvkm_oclass *oclass, struct nvkm_object **pobject) { - struct nvkm_device *device = fifo->base.engine.subdev.device; struct gk104_fifo_chan *chan; int runlist = -1, ret = -ENOSYS, i, j; u32 engines = 0, present = 0; u64 subdevs = 0; u64 usermem; + if (!vmm) + return -EINVAL; + /* Determine which downstream engines are present */ for (i = 0; i < fifo->engine_nr; i++) { struct nvkm_engine *engine = fifo->engine[i].engine; @@ -285,30 +289,14 @@ gk104_fifo_gpfifo_new_(const struct gk104_fifo_chan_func *func, INIT_LIST_HEAD(&chan->head); ret = nvkm_fifo_chan_ctor(&gk104_fifo_gpfifo_func, &fifo->base, - 0x1000, 0x1000, true, vm, 0, subdevs, - 1, fifo->user.bar.offset, 0x200, + 0x1000, 0x1000, true, vmm, 0, subdevs, + 1, fifo->user.bar->addr, 0x200, oclass, &chan->base); if (ret) return ret; *chid = chan->base.chid; - /* Page directory. */ - ret = nvkm_gpuobj_new(device, 0x10000, 0x1000, false, NULL, &chan->pgd); - if (ret) - return ret; - - nvkm_kmap(chan->base.inst); - nvkm_wo32(chan->base.inst, 0x0200, lower_32_bits(chan->pgd->addr)); - nvkm_wo32(chan->base.inst, 0x0204, upper_32_bits(chan->pgd->addr)); - nvkm_wo32(chan->base.inst, 0x0208, 0xffffffff); - nvkm_wo32(chan->base.inst, 0x020c, 0x000000ff); - nvkm_done(chan->base.inst); - - ret = nvkm_vm_ref(chan->base.vm, &chan->vm, chan->pgd); - if (ret) - return ret; - /* Clear channel control registers. */ usermem = chan->base.chid * 0x200; ilength = order_base_2(ilength / 8); @@ -373,18 +361,17 @@ gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, nvif_ioctl(parent, "create channel gpfifo size %d\n", size); if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { - nvif_ioctl(parent, "create channel gpfifo vers %d vm %llx " + nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx " "ioffset %016llx ilength %08x engine %08x\n", - args->v0.version, args->v0.vm, args->v0.ioffset, + args->v0.version, args->v0.vmm, args->v0.ioffset, args->v0.ilength, args->v0.engines); return gk104_fifo_gpfifo_new_(gk104_fifo_gpfifo, fifo, &args->v0.engines, &args->v0.chid, - args->v0.vm, + args->v0.vmm, args->v0.ioffset, args->v0.ilength, oclass, pobject); - } return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifonv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifonv50.c index c5a7de9db259..d8f28ec1e4a8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifonv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifonv50.c @@ -45,10 +45,10 @@ nv50_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, nvif_ioctl(parent, "create channel gpfifo size %d\n", size); if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { - nvif_ioctl(parent, "create channel gpfifo vers %d vm %llx " + nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx " "pushbuf %llx ioffset %016llx " "ilength %08x\n", - args->v0.version, args->v0.vm, args->v0.pushbuf, + args->v0.version, args->v0.vmm, args->v0.pushbuf, args->v0.ioffset, args->v0.ilength); if (!args->v0.pushbuf) return -EINVAL; @@ -59,7 +59,7 @@ nv50_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, return -ENOMEM; *pobject = &chan->base.object; - ret = nv50_fifo_chan_ctor(fifo, args->v0.vm, args->v0.pushbuf, + ret = nv50_fifo_chan_ctor(fifo, args->v0.vmm, args->v0.pushbuf, oclass, chan); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.h index 03f60004bf7c..1d70542553cc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV04_FIFO_H__ #define __NV04_FIFO_H__ #define nv04_fifo(p) container_of((p), struct nv04_fifo, base) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c index 66eb12c2b5ba..fa6e094d8068 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c @@ -100,8 +100,8 @@ void * nv50_fifo_dtor(struct nvkm_fifo *base) { struct nv50_fifo *fifo = nv50_fifo(base); - nvkm_memory_del(&fifo->runlist[1]); - nvkm_memory_del(&fifo->runlist[0]); + nvkm_memory_unref(&fifo->runlist[1]); + nvkm_memory_unref(&fifo->runlist[0]); return fifo; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.h index 8ab53948cbb4..a3994e8db462 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV50_FIFO_H__ #define __NV50_FIFO_H__ #define nv50_fifo(p) container_of((p), struct nv50_fifo, base) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h index f889b13b5e41..ae76b1aaccd4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_FIFO_PRIV_H__ #define __NVKM_FIFO_PRIV_H__ #define nvkm_fifo(p) container_of((p), struct nvkm_fifo, engine) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/regsnv04.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/regsnv04.h index 92d56221197b..49892a5e7201 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/regsnv04.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/regsnv04.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV04_FIFO_REGS_H__ #define __NV04_FIFO_REGS_H__ diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c index bc77eea351a5..881015080d83 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c @@ -986,14 +986,14 @@ gf100_grctx_pack_tpc[] = { ******************************************************************************/ int -gf100_grctx_mmio_data(struct gf100_grctx *info, u32 size, u32 align, u32 access) +gf100_grctx_mmio_data(struct gf100_grctx *info, u32 size, u32 align, bool priv) { if (info->data) { info->buffer[info->buffer_nr] = round_up(info->addr, align); info->addr = info->buffer[info->buffer_nr] + size; info->data->size = size; info->data->align = align; - info->data->access = access; + info->data->priv = priv; info->data++; return info->buffer_nr++; } @@ -1028,9 +1028,8 @@ void gf100_grctx_generate_bundle(struct gf100_grctx *info) { const struct gf100_grctx_func *grctx = info->gr->func->grctx; - const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS; const int s = 8; - const int b = mmio_vram(info, grctx->bundle_size, (1 << s), access); + const int b = mmio_vram(info, grctx->bundle_size, (1 << s), true); mmio_refn(info, 0x408004, 0x00000000, s, b); mmio_wr32(info, 0x408008, 0x80000000 | (grctx->bundle_size >> s)); mmio_refn(info, 0x418808, 0x00000000, s, b); @@ -1041,9 +1040,8 @@ void gf100_grctx_generate_pagepool(struct gf100_grctx *info) { const struct gf100_grctx_func *grctx = info->gr->func->grctx; - const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS; const int s = 8; - const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), access); + const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true); mmio_refn(info, 0x40800c, 0x00000000, s, b); mmio_wr32(info, 0x408010, 0x80000000); mmio_refn(info, 0x419004, 0x00000000, s, b); @@ -1057,9 +1055,8 @@ gf100_grctx_generate_attrib(struct gf100_grctx *info) const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 attrib = grctx->attrib_nr; const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const u32 access = NV_MEM_ACCESS_RW; const int s = 12; - const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), access); + const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false); int gpc, tpc; u32 bo = 0; @@ -1267,85 +1264,87 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) nvkm_mc_unk260(device, 1); } +#define CB_RESERVED 0x80000 + int gf100_grctx_generate(struct gf100_gr *gr) { const struct gf100_grctx_func *grctx = gr->func->grctx; struct nvkm_subdev *subdev = &gr->base.engine.subdev; struct nvkm_device *device = subdev->device; - struct nvkm_memory *chan; + struct nvkm_memory *inst = NULL; + struct nvkm_memory *data = NULL; + struct nvkm_vmm *vmm = NULL; + struct nvkm_vma *ctx = NULL; struct gf100_grctx info; int ret, i; u64 addr; - /* allocate memory to for a "channel", which we'll use to generate - * the default context values + /* Allocate memory to for a "channel", which we'll use to generate + * the default context values. */ - ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x80000 + gr->size, - 0x1000, true, &chan); - if (ret) { - nvkm_error(subdev, "failed to allocate chan memory, %d\n", ret); - return ret; - } + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, + 0x1000, 0x1000, true, &inst); + if (ret) + goto done; - addr = nvkm_memory_addr(chan); + ret = nvkm_vmm_new(device, 0, 0, NULL, 0, NULL, "grctx", &vmm); + if (ret) + goto done; - /* PGD pointer */ - nvkm_kmap(chan); - nvkm_wo32(chan, 0x0200, lower_32_bits(addr + 0x1000)); - nvkm_wo32(chan, 0x0204, upper_32_bits(addr + 0x1000)); - nvkm_wo32(chan, 0x0208, 0xffffffff); - nvkm_wo32(chan, 0x020c, 0x000000ff); + vmm->debug = subdev->debug; - /* PGT[0] pointer */ - nvkm_wo32(chan, 0x1000, 0x00000000); - nvkm_wo32(chan, 0x1004, 0x00000001 | (addr + 0x2000) >> 8); + ret = nvkm_vmm_join(vmm, inst); + if (ret) + goto done; - /* identity-map the whole "channel" into its own vm */ - for (i = 0; i < nvkm_memory_size(chan) / 4096; i++) { - u64 addr = ((nvkm_memory_addr(chan) + (i * 4096)) >> 8) | 1; - nvkm_wo32(chan, 0x2000 + (i * 8), lower_32_bits(addr)); - nvkm_wo32(chan, 0x2004 + (i * 8), upper_32_bits(addr)); - } + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, + CB_RESERVED + gr->size, 0, true, &data); + if (ret) + goto done; - /* context pointer (virt) */ - nvkm_wo32(chan, 0x0210, 0x00080004); - nvkm_wo32(chan, 0x0214, 0x00000000); - nvkm_done(chan); + ret = nvkm_vmm_get(vmm, 0, nvkm_memory_size(data), &ctx); + if (ret) + goto done; - nvkm_wr32(device, 0x100cb8, (addr + 0x1000) >> 8); - nvkm_wr32(device, 0x100cbc, 0x80000001); - nvkm_msec(device, 2000, - if (nvkm_rd32(device, 0x100c80) & 0x00008000) - break; - ); + ret = nvkm_memory_map(data, 0, vmm, ctx, NULL, 0); + if (ret) + goto done; + + + /* Setup context pointer. */ + nvkm_kmap(inst); + nvkm_wo32(inst, 0x0210, lower_32_bits(ctx->addr + CB_RESERVED) | 4); + nvkm_wo32(inst, 0x0214, upper_32_bits(ctx->addr + CB_RESERVED)); + nvkm_done(inst); - /* setup default state for mmio list construction */ + /* Setup default state for mmio list construction. */ info.gr = gr; info.data = gr->mmio_data; info.mmio = gr->mmio_list; - info.addr = 0x2000 + (i * 8); + info.addr = ctx->addr; info.buffer_nr = 0; - /* make channel current */ + /* Make channel current. */ + addr = nvkm_memory_addr(inst) >> 12; if (gr->firmware) { nvkm_wr32(device, 0x409840, 0x00000030); - nvkm_wr32(device, 0x409500, 0x80000000 | addr >> 12); + nvkm_wr32(device, 0x409500, 0x80000000 | addr); nvkm_wr32(device, 0x409504, 0x00000003); nvkm_msec(device, 2000, if (nvkm_rd32(device, 0x409800) & 0x00000010) break; ); - nvkm_kmap(chan); - nvkm_wo32(chan, 0x8001c, 1); - nvkm_wo32(chan, 0x80020, 0); - nvkm_wo32(chan, 0x80028, 0); - nvkm_wo32(chan, 0x8002c, 0); - nvkm_done(chan); + nvkm_kmap(data); + nvkm_wo32(data, 0x1c, 1); + nvkm_wo32(data, 0x20, 0); + nvkm_wo32(data, 0x28, 0); + nvkm_wo32(data, 0x2c, 0); + nvkm_done(data); } else { nvkm_wr32(device, 0x409840, 0x80000000); - nvkm_wr32(device, 0x409500, 0x80000000 | addr >> 12); + nvkm_wr32(device, 0x409500, 0x80000000 | addr); nvkm_wr32(device, 0x409504, 0x00000001); nvkm_msec(device, 2000, if (nvkm_rd32(device, 0x409800) & 0x80000000) @@ -1355,8 +1354,8 @@ gf100_grctx_generate(struct gf100_gr *gr) grctx->main(gr, &info); - /* trigger a context unload by unsetting the "next channel valid" bit - * and faking a context switch interrupt + /* Trigger a context unload by unsetting the "next channel valid" bit + * and faking a context switch interrupt. */ nvkm_mask(device, 0x409b04, 0x80000000, 0x00000000); nvkm_wr32(device, 0x409000, 0x00000100); @@ -1370,17 +1369,21 @@ gf100_grctx_generate(struct gf100_gr *gr) gr->data = kmalloc(gr->size, GFP_KERNEL); if (gr->data) { - nvkm_kmap(chan); + nvkm_kmap(data); for (i = 0; i < gr->size; i += 4) - gr->data[i / 4] = nvkm_ro32(chan, 0x80000 + i); - nvkm_done(chan); + gr->data[i / 4] = nvkm_ro32(data, CB_RESERVED + i); + nvkm_done(data); ret = 0; } else { ret = -ENOMEM; } done: - nvkm_memory_del(&chan); + nvkm_vmm_put(vmm, &ctx); + nvkm_memory_unref(&data); + nvkm_vmm_part(vmm, inst); + nvkm_vmm_unref(&vmm); + nvkm_memory_unref(&inst); return ret; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h index 017180d147cf..5199e5aa0cb7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_GRCTX_NVC0_H__ #define __NVKM_GRCTX_NVC0_H__ #include "gf100.h" @@ -11,7 +12,7 @@ struct gf100_grctx { u64 addr; }; -int gf100_grctx_mmio_data(struct gf100_grctx *, u32 size, u32 align, u32 access); +int gf100_grctx_mmio_data(struct gf100_grctx *, u32 size, u32 align, bool priv); void gf100_grctx_mmio_item(struct gf100_grctx *, u32 addr, u32 data, int s, int); #define mmio_vram(a,b,c,d) gf100_grctx_mmio_data((a), (b), (c), (d)) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c index 505cdcbfc085..82f71b10c06e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c @@ -735,9 +735,8 @@ gf108_grctx_generate_attrib(struct gf100_grctx *info) const u32 alpha = grctx->alpha_nr; const u32 beta = grctx->attrib_nr; const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const u32 access = NV_MEM_ACCESS_RW; const int s = 12; - const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), access); + const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false); const int timeslice_mode = 1; const int max_batches = 0xffff; u32 bo = 0; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c index 74a64e3fd59a..19301d88577d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c @@ -187,9 +187,8 @@ gf117_grctx_generate_attrib(struct gf100_grctx *info) const u32 alpha = grctx->alpha_nr; const u32 beta = grctx->attrib_nr; const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const u32 access = NV_MEM_ACCESS_RW; const int s = 12; - const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), access); + const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false); const int timeslice_mode = 1; const int max_batches = 0xffff; u32 bo = 0; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c index c46b3fdf7203..825c8fd500bc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c @@ -847,9 +847,8 @@ gk104_grctx_generate_bundle(struct gf100_grctx *info) const u32 state_limit = min(grctx->bundle_min_gpm_fifo_depth, grctx->bundle_size / 0x20); const u32 token_limit = grctx->bundle_token_limit; - const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS; const int s = 8; - const int b = mmio_vram(info, grctx->bundle_size, (1 << s), access); + const int b = mmio_vram(info, grctx->bundle_size, (1 << s), true); mmio_refn(info, 0x408004, 0x00000000, s, b); mmio_wr32(info, 0x408008, 0x80000000 | (grctx->bundle_size >> s)); mmio_refn(info, 0x418808, 0x00000000, s, b); @@ -861,9 +860,8 @@ void gk104_grctx_generate_pagepool(struct gf100_grctx *info) { const struct gf100_grctx_func *grctx = info->gr->func->grctx; - const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS; const int s = 8; - const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), access); + const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true); mmio_refn(info, 0x40800c, 0x00000000, s, b); mmio_wr32(info, 0x408010, 0x80000000); mmio_refn(info, 0x419004, 0x00000000, s, b); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c index 4c4b5ab6e46d..9b43d4ce3eaa 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c @@ -867,9 +867,8 @@ gm107_grctx_generate_bundle(struct gf100_grctx *info) const u32 state_limit = min(grctx->bundle_min_gpm_fifo_depth, grctx->bundle_size / 0x20); const u32 token_limit = grctx->bundle_token_limit; - const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS; const int s = 8; - const int b = mmio_vram(info, grctx->bundle_size, (1 << s), access); + const int b = mmio_vram(info, grctx->bundle_size, (1 << s), true); mmio_refn(info, 0x408004, 0x00000000, s, b); mmio_wr32(info, 0x408008, 0x80000000 | (grctx->bundle_size >> s)); mmio_refn(info, 0x418e24, 0x00000000, s, b); @@ -881,9 +880,8 @@ void gm107_grctx_generate_pagepool(struct gf100_grctx *info) { const struct gf100_grctx_func *grctx = info->gr->func->grctx; - const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS; const int s = 8; - const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), access); + const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true); mmio_refn(info, 0x40800c, 0x00000000, s, b); mmio_wr32(info, 0x408010, 0x80000000); mmio_refn(info, 0x419004, 0x00000000, s, b); @@ -900,9 +898,8 @@ gm107_grctx_generate_attrib(struct gf100_grctx *info) const u32 alpha = grctx->alpha_nr; const u32 attrib = grctx->attrib_nr; const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const u32 access = NV_MEM_ACCESS_RW; const int s = 12; - const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), access); + const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false); const int max_batches = 0xffff; u32 bo = 0; u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c index 7833bc777a29..88ea322d956c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c @@ -33,9 +33,8 @@ void gp100_grctx_generate_pagepool(struct gf100_grctx *info) { const struct gf100_grctx_func *grctx = info->gr->func->grctx; - const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS; const int s = 8; - const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), access); + const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true); mmio_refn(info, 0x40800c, 0x00000000, s, b); mmio_wr32(info, 0x408010, 0x80000000); mmio_refn(info, 0x419004, 0x00000000, s, b); @@ -51,9 +50,8 @@ gp100_grctx_generate_attrib(struct gf100_grctx *info) const u32 attrib = grctx->attrib_nr; const u32 pertpc = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); const u32 size = roundup(gr->tpc_total * pertpc, 0x80); - const u32 access = NV_MEM_ACCESS_RW; const int s = 12; - const int b = mmio_vram(info, size, (1 << s), access); + const int b = mmio_vram(info, size, (1 << s), false); const int max_batches = 0xffff; u32 ao = 0; u32 bo = ao + grctx->alpha_nr_max * gr->tpc_total; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c index 80b7ab0bee3a..7a66b4c2eb18 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c @@ -38,9 +38,8 @@ gp102_grctx_generate_attrib(struct gf100_grctx *info) const u32 attrib = grctx->attrib_nr; const u32 pertpc = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); const u32 size = roundup(gr->tpc_total * pertpc, 0x80); - const u32 access = NV_MEM_ACCESS_RW; const int s = 12; - const int b = mmio_vram(info, size, (1 << s), access); + const int b = mmio_vram(info, size, (1 << s), false); const int max_batches = 0xffff; u32 ao = 0; u32 bo = ao + grctx->alpha_nr_max * gr->tpc_total; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv40.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv40.h index 50e808e9f926..4d67d90261b8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv40.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv40.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_GRCTX_H__ #define __NVKM_GRCTX_H__ #include <core/gpuobj.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf100.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf100.fuc3.h index 12a703fe355d..0323acb739c8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf100.fuc3.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf100.fuc3.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ static uint32_t gf100_grgpc_data[] = { /* 0x0000: gpc_mmio_list_head */ 0x00000064, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h index ffbfc51200f1..1bb265917915 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ static uint32_t gf117_grgpc_data[] = { /* 0x0000: gpc_mmio_list_head */ 0x0000006c, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h index 357f662de571..cf8343a693ba 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ static uint32_t gk104_grgpc_data[] = { /* 0x0000: gpc_mmio_list_head */ 0x0000006c, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h index 4ffc8212a85c..f4bfa109ed27 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ static uint32_t gk110_grgpc_data[] = { /* 0x0000: gpc_mmio_list_head */ 0x0000006c, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h index 09196206c9bc..59a3e1b2927f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ static uint32_t gk208_grgpc_data[] = { /* 0x0000: gpc_mmio_list_head */ 0x0000006c, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h index 6d7d004363d9..8daa0516704a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ static uint32_t gm107_grgpc_data[] = { /* 0x0000: gpc_mmio_list_head */ 0x0000006c, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf100.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf100.fuc3.h index 7538404b8b13..cbf2351f8da8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf100.fuc3.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf100.fuc3.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ static uint32_t gf100_grhub_data[] = { /* 0x0000: hub_mmio_list_head */ 0x00000300, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf117.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf117.fuc3.h index ce000a47ec6d..70830036ffee 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf117.fuc3.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf117.fuc3.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ static uint32_t gf117_grhub_data[] = { /* 0x0000: hub_mmio_list_head */ 0x00000300, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk104.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk104.fuc3.h index 1f26cb6a233c..7f2fd84d0c3a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk104.fuc3.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk104.fuc3.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ static uint32_t gk104_grhub_data[] = { /* 0x0000: hub_mmio_list_head */ 0x00000300, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk110.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk110.fuc3.h index 70436d93efe3..560063789de8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk110.fuc3.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk110.fuc3.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ static uint32_t gk110_grhub_data[] = { /* 0x0000: hub_mmio_list_head */ 0x00000300, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk208.fuc5.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk208.fuc5.h index e0933a07426a..71e85784b615 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk208.fuc5.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk208.fuc5.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ static uint32_t gk208_grhub_data[] = { /* 0x0000: hub_mmio_list_head */ 0x00000300, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgm107.fuc5.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgm107.fuc5.h index 9b432823bcbe..d85eac6d1c61 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgm107.fuc5.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgm107.fuc5.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ static uint32_t gm107_grhub_data[] = { /* 0x0000: hub_mmio_list_head */ 0x00000300, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/os.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/os.h index 1718ae4e8224..f87693809c9f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/os.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/os.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_GRAPH_OS_H__ #define __NVKM_GRAPH_OS_H__ diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 99689f4de502..2f8dc107047d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -37,6 +37,7 @@ #include <nvif/class.h> #include <nvif/cl9097.h> +#include <nvif/if900d.h> #include <nvif/unpack.h> /******************************************************************************* @@ -327,13 +328,13 @@ gf100_gr_chan_bind(struct nvkm_object *object, struct nvkm_gpuobj *parent, if (!gr->firmware) { nvkm_wo32(*pgpuobj, 0x00, chan->mmio_nr / 2); - nvkm_wo32(*pgpuobj, 0x04, chan->mmio_vma.offset >> 8); + nvkm_wo32(*pgpuobj, 0x04, chan->mmio_vma->addr >> 8); } else { nvkm_wo32(*pgpuobj, 0xf4, 0); nvkm_wo32(*pgpuobj, 0xf8, 0); nvkm_wo32(*pgpuobj, 0x10, chan->mmio_nr / 2); - nvkm_wo32(*pgpuobj, 0x14, lower_32_bits(chan->mmio_vma.offset)); - nvkm_wo32(*pgpuobj, 0x18, upper_32_bits(chan->mmio_vma.offset)); + nvkm_wo32(*pgpuobj, 0x14, lower_32_bits(chan->mmio_vma->addr)); + nvkm_wo32(*pgpuobj, 0x18, upper_32_bits(chan->mmio_vma->addr)); nvkm_wo32(*pgpuobj, 0x1c, 1); nvkm_wo32(*pgpuobj, 0x20, 0); nvkm_wo32(*pgpuobj, 0x28, 0); @@ -350,18 +351,13 @@ gf100_gr_chan_dtor(struct nvkm_object *object) int i; for (i = 0; i < ARRAY_SIZE(chan->data); i++) { - if (chan->data[i].vma.node) { - nvkm_vm_unmap(&chan->data[i].vma); - nvkm_vm_put(&chan->data[i].vma); - } - nvkm_memory_del(&chan->data[i].mem); + nvkm_vmm_put(chan->vmm, &chan->data[i].vma); + nvkm_memory_unref(&chan->data[i].mem); } - if (chan->mmio_vma.node) { - nvkm_vm_unmap(&chan->mmio_vma); - nvkm_vm_put(&chan->mmio_vma); - } - nvkm_memory_del(&chan->mmio); + nvkm_vmm_put(chan->vmm, &chan->mmio_vma); + nvkm_memory_unref(&chan->mmio); + nvkm_vmm_unref(&chan->vmm); return chan; } @@ -380,6 +376,7 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, struct gf100_gr_data *data = gr->mmio_data; struct gf100_gr_mmio *mmio = gr->mmio_list; struct gf100_gr_chan *chan; + struct gf100_vmm_map_v0 args = { .priv = 1 }; struct nvkm_device *device = gr->base.engine.subdev.device; int ret, i; @@ -387,6 +384,7 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, return -ENOMEM; nvkm_object_ctor(&gf100_gr_chan, oclass, &chan->object); chan->gr = gr; + chan->vmm = nvkm_vmm_ref(fifoch->vmm); *pobject = &chan->object; /* allocate memory for a "mmio list" buffer that's used by the HUB @@ -398,12 +396,14 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, if (ret) return ret; - ret = nvkm_vm_get(fifoch->vm, 0x1000, 12, NV_MEM_ACCESS_RW | - NV_MEM_ACCESS_SYS, &chan->mmio_vma); + ret = nvkm_vmm_get(fifoch->vmm, 12, 0x1000, &chan->mmio_vma); if (ret) return ret; - nvkm_memory_map(chan->mmio, &chan->mmio_vma, 0); + ret = nvkm_memory_map(chan->mmio, 0, fifoch->vmm, + chan->mmio_vma, &args, sizeof(args)); + if (ret) + return ret; /* allocate buffers referenced by mmio list */ for (i = 0; data->size && i < ARRAY_SIZE(gr->mmio_data); i++) { @@ -413,13 +413,19 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, if (ret) return ret; - ret = nvkm_vm_get(fifoch->vm, - nvkm_memory_size(chan->data[i].mem), 12, - data->access, &chan->data[i].vma); + ret = nvkm_vmm_get(fifoch->vmm, 12, + nvkm_memory_size(chan->data[i].mem), + &chan->data[i].vma); + if (ret) + return ret; + + args.priv = data->priv; + + ret = nvkm_memory_map(chan->data[i].mem, 0, chan->vmm, + chan->data[i].vma, &args, sizeof(args)); if (ret) return ret; - nvkm_memory_map(chan->data[i].mem, &chan->data[i].vma, 0); data++; } @@ -430,7 +436,7 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, u32 data = mmio->data; if (mmio->buffer >= 0) { - u64 info = chan->data[mmio->buffer].vma.offset; + u64 info = chan->data[mmio->buffer].vma->addr; data |= info >> mmio->shift; } @@ -1855,8 +1861,12 @@ gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname, int ret; ret = nvkm_firmware_get(device, fwname, &fw); - if (ret) - return gf100_gr_ctor_fw_legacy(gr, fwname, fuc, ret); + if (ret) { + ret = gf100_gr_ctor_fw_legacy(gr, fwname, fuc, ret); + if (ret) + return -ENODEV; + return 0; + } fuc->size = fw->size; fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL); @@ -1903,25 +1913,33 @@ gf100_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device, return 0; } +void +gf100_gr_init_gpc_mmu(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + struct nvkm_fb *fb = device->fb; + + nvkm_wr32(device, 0x418880, nvkm_rd32(device, 0x100c80) & 0x00000001); + nvkm_wr32(device, 0x4188a4, 0x00000000); + nvkm_wr32(device, 0x418888, 0x00000000); + nvkm_wr32(device, 0x41888c, 0x00000000); + nvkm_wr32(device, 0x418890, 0x00000000); + nvkm_wr32(device, 0x418894, 0x00000000); + nvkm_wr32(device, 0x4188b4, nvkm_memory_addr(fb->mmu_wr) >> 8); + nvkm_wr32(device, 0x4188b8, nvkm_memory_addr(fb->mmu_rd) >> 8); +} + int gf100_gr_init(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - struct nvkm_fb *fb = device->fb; const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); u32 data[TPC_MAX / 8] = {}; u8 tpcnr[GPC_MAX]; int gpc, tpc, rop; int i; - nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x08a4), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x0888), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x088c), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x08b4), nvkm_memory_addr(fb->mmu_wr) >> 8); - nvkm_wr32(device, GPC_BCAST(0x08b8), nvkm_memory_addr(fb->mmu_rd) >> 8); + gr->func->init_gpc_mmu(gr); gf100_gr_mmio(gr, gr->func->mmio); @@ -2036,6 +2054,7 @@ gf100_gr_gpccs_ucode = { static const struct gf100_gr_func gf100_gr = { .init = gf100_gr_init, + .init_gpc_mmu = gf100_gr_init_gpc_mmu, .mmio = gf100_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, .gpccs.ucode = &gf100_gr_gpccs_ucode, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h index a36e45a4a635..d7c2adb9b543 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h @@ -45,7 +45,7 @@ struct gf100_gr_data { u32 size; u32 align; - u32 access; + bool priv; }; struct gf100_gr_mmio { @@ -156,18 +156,20 @@ int gp100_gr_init(struct gf100_gr *); void gp100_gr_init_rop_active_fbps(struct gf100_gr *); #define gf100_gr_chan(p) container_of((p), struct gf100_gr_chan, object) +#include <core/object.h> struct gf100_gr_chan { struct nvkm_object object; struct gf100_gr *gr; + struct nvkm_vmm *vmm; struct nvkm_memory *mmio; - struct nvkm_vma mmio_vma; + struct nvkm_vma *mmio_vma; int mmio_nr; struct { struct nvkm_memory *mem; - struct nvkm_vma vma; + struct nvkm_vma *vma; } data[4]; }; @@ -253,6 +255,7 @@ extern const struct gf100_gr_init gf100_gr_init_mpc_0[]; extern const struct gf100_gr_init gf100_gr_init_be_0[]; extern const struct gf100_gr_init gf100_gr_init_fe_1[]; extern const struct gf100_gr_init gf100_gr_init_pe_1[]; +void gf100_gr_init_gpc_mmu(struct gf100_gr *); extern const struct gf100_gr_init gf104_gr_init_ds_0[]; extern const struct gf100_gr_init gf104_gr_init_tex_0[]; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c index d736dcd55ea2..ec0f11983b23 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c @@ -115,6 +115,7 @@ gf104_gr_pack_mmio[] = { static const struct gf100_gr_func gf104_gr = { .init = gf100_gr_init, + .init_gpc_mmu = gf100_gr_init_gpc_mmu, .mmio = gf104_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, .gpccs.ucode = &gf100_gr_gpccs_ucode, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c index 2f0d24498427..cc152eb74123 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c @@ -106,6 +106,7 @@ gf108_gr_pack_mmio[] = { static const struct gf100_gr_func gf108_gr = { .init = gf100_gr_init, + .init_gpc_mmu = gf100_gr_init_gpc_mmu, .mmio = gf108_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, .gpccs.ucode = &gf100_gr_gpccs_ucode, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c index d1d942eb86af..10d2d73ca8c3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c @@ -87,6 +87,7 @@ gf110_gr_pack_mmio[] = { static const struct gf100_gr_func gf110_gr = { .init = gf100_gr_init, + .init_gpc_mmu = gf100_gr_init_gpc_mmu, .mmio = gf110_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, .gpccs.ucode = &gf100_gr_gpccs_ucode, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c index 0124e468086e..ac09a07c4150 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c @@ -123,6 +123,7 @@ gf117_gr_gpccs_ucode = { static const struct gf100_gr_func gf117_gr = { .init = gf100_gr_init, + .init_gpc_mmu = gf100_gr_init_gpc_mmu, .mmio = gf117_gr_pack_mmio, .fecs.ucode = &gf117_gr_fecs_ucode, .gpccs.ucode = &gf117_gr_gpccs_ucode, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c index 8d8e4cafe28f..7f449ec6f760 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c @@ -178,6 +178,7 @@ gf119_gr_pack_mmio[] = { static const struct gf100_gr_func gf119_gr = { .init = gf100_gr_init, + .init_gpc_mmu = gf100_gr_init_gpc_mmu, .mmio = gf119_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, .gpccs.ucode = &gf100_gr_gpccs_ucode, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c index ec22da6c99fc..5e82f94c2245 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c @@ -24,8 +24,6 @@ #include "gf100.h" #include "ctxgf100.h" -#include <subdev/fb.h> - #include <nvif/class.h> /******************************************************************************* @@ -207,21 +205,13 @@ int gk104_gr_init(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - struct nvkm_fb *fb = device->fb; const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); u32 data[TPC_MAX / 8] = {}; u8 tpcnr[GPC_MAX]; int gpc, tpc, rop; int i; - nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x08a4), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x0888), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x088c), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000); - nvkm_wr32(device, GPC_BCAST(0x08b4), nvkm_memory_addr(fb->mmu_wr) >> 8); - nvkm_wr32(device, GPC_BCAST(0x08b8), nvkm_memory_addr(fb->mmu_rd) >> 8); + gr->func->init_gpc_mmu(gr); gf100_gr_mmio(gr, gr->func->mmio); @@ -339,6 +329,7 @@ gk104_gr_gpccs_ucode = { static const struct gf100_gr_func gk104_gr = { .init = gk104_gr_init, + .init_gpc_mmu = gf100_gr_init_gpc_mmu, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, .mmio = gk104_gr_pack_mmio, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c index f31b171a4102..a38e19b61c1d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c @@ -183,6 +183,7 @@ gk110_gr_gpccs_ucode = { static const struct gf100_gr_func gk110_gr = { .init = gk104_gr_init, + .init_gpc_mmu = gf100_gr_init_gpc_mmu, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, .mmio = gk110_gr_pack_mmio, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c index d76dd178007f..1912c0bfd7ee 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c @@ -103,6 +103,7 @@ gk110b_gr_pack_mmio[] = { static const struct gf100_gr_func gk110b_gr = { .init = gk104_gr_init, + .init_gpc_mmu = gf100_gr_init_gpc_mmu, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, .mmio = gk110b_gr_pack_mmio, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c index 14bbe6ed02a9..1fc258163f25 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c @@ -162,6 +162,7 @@ gk208_gr_gpccs_ucode = { static const struct gf100_gr_func gk208_gr = { .init = gk104_gr_init, + .init_gpc_mmu = gf100_gr_init_gpc_mmu, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, .mmio = gk208_gr_pack_mmio, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.h index d7c3d86cc99d..d5a376c4dd0b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV10_GR_H__ #define __NV10_GR_H__ #include "priv.h" diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c index d1dc92999dc0..111c8bb4497b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "nv20.h" #include "regs.h" @@ -59,7 +60,7 @@ void * nv20_gr_chan_dtor(struct nvkm_object *object) { struct nv20_gr_chan *chan = nv20_gr_chan(object); - nvkm_memory_del(&chan->inst); + nvkm_memory_unref(&chan->inst); return chan; } @@ -323,7 +324,7 @@ void * nv20_gr_dtor(struct nvkm_gr *base) { struct nv20_gr *gr = nv20_gr(base); - nvkm_memory_del(&gr->ctxtab); + nvkm_memory_unref(&gr->ctxtab); return gr; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.h index cdf4501e3798..979dc5f7b32e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV20_GR_H__ #define __NV20_GR_H__ #define nv20_gr(p) container_of((p), struct nv20_gr, base) @@ -19,6 +20,7 @@ void nv20_gr_tile(struct nvkm_gr *, int, struct nvkm_fb_tile *); int nv30_gr_init(struct nvkm_gr *); #define nv20_gr_chan(p) container_of((p), struct nv20_gr_chan, object) +#include <core/object.h> struct nv20_gr_chan { struct nvkm_object object; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c index 6c4a00819b4b..e59a28a26d65 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "nv20.h" #include "regs.h" diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c index 3cad26dbc2b1..e113b2d4c811 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "nv20.h" #include "regs.h" diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c index b4e3c50badc7..4aac2c224874 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "nv20.h" #include "regs.h" diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c index e7ed04b935cd..301556503e93 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "nv20.h" #include "regs.h" diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c index 5e8abacbacc6..5d6926611a5b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "nv20.h" #include "regs.h" diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.h index 2812ed11f877..731400937edd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV40_GR_H__ #define __NV40_GR_H__ #define nv40_gr(p) container_of((p), struct nv40_gr, base) @@ -16,6 +17,7 @@ void nv40_gr_intr(struct nvkm_gr *); u64 nv40_gr_units(struct nvkm_gr *); #define nv40_gr_chan(p) container_of((p), struct nv40_gr_chan, object) +#include <core/object.h> struct nv40_gr_chan { struct nvkm_object object; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.h index 45eec83a5969..5b9d99bee207 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV50_GR_H__ #define __NV50_GR_H__ #define nv50_gr(p) container_of((p), struct nv50_gr, base) @@ -19,6 +20,7 @@ u64 nv50_gr_units(struct nvkm_gr *); int g84_gr_tlb_flush(struct nvkm_gr *); #define nv50_gr_chan(p) container_of((p), struct nv50_gr_chan, object) +#include <core/object.h> struct nv50_gr_chan { struct nvkm_object object; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h index 2a52d9f026ec..66359c23cbce 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_GR_PRIV_H__ #define __NVKM_GR_PRIV_H__ #define nvkm_gr(p) container_of((p), struct nvkm_gr, engine) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/regs.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/regs.h index 90a9873ce522..dc4f936675ac 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/regs.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/regs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_GR_REGS_H__ #define __NVKM_GR_REGS_H__ diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.h b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.h index d3bb34fcdebf..b31fad8bdaad 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV31_MPEG_H__ #define __NV31_MPEG_H__ #define nv31_mpeg(p) container_of((p), struct nv31_mpeg, engine) @@ -18,6 +19,7 @@ struct nv31_mpeg_func { }; #define nv31_mpeg_chan(p) container_of((p), struct nv31_mpeg_chan, object) +#include <core/object.h> struct nv31_mpeg_chan { struct nvkm_object object; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv50.c index 4e528851e9c0..6df880a39019 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv50.c @@ -24,6 +24,7 @@ #include "priv.h" #include <core/gpuobj.h> +#include <core/object.h> #include <subdev/timer.h> #include <nvif/class.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/priv.h index d5753103ff63..26f9d14151e2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_MPEG_PRIV_H__ #define __NVKM_MPEG_PRIV_H__ #include <engine/mpeg.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mspdec/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/mspdec/priv.h index d518af4bc9de..db305072a82f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/mspdec/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/mspdec/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_MSPDEC_PRIV_H__ #define __NVKM_MSPDEC_PRIV_H__ #include <engine/mspdec.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/msppp/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/msppp/priv.h index 37a91f9d9181..7708e52c9043 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/msppp/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/msppp/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_MSPPP_PRIV_H__ #define __NVKM_MSPPP_PRIV_H__ #include <engine/msppp.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/msvld/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/msvld/priv.h index 9dc1da67d929..66c36049abca 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/msvld/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/msvld/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_MSVLD_PRIV_H__ #define __NVKM_MSVLD_PRIV_H__ #include <engine/msvld.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/priv.h index 353b94f51205..6c300739f621 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_NVDEC_PRIV_H__ #define __NVKM_NVDEC_PRIV_H__ #include <engine/nvdec.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/pm/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/pm/gf100.h index 56d0344853ea..c74fd4557d41 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/pm/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/pm/gf100.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_PM_NVC0_H__ #define __NVKM_PM_NVC0_H__ #include "priv.h" diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/pm/nv40.h b/drivers/gpu/drm/nouveau/nvkm/engine/pm/nv40.h index da481abe8f7a..3f37b713936c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/pm/nv40.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/pm/nv40.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_PM_NV40_H__ #define __NVKM_PM_NV40_H__ #define nv40_pm(p) container_of((p), struct nv40_pm, base) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/pm/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/pm/priv.h index d7b81cbf82b5..9fad3611a843 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/pm/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/pm/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_PM_PRIV_H__ #define __NVKM_PM_PRIV_H__ #define nvkm_pm(p) container_of((p), struct nvkm_pm, engine) @@ -67,6 +68,7 @@ struct nvkm_specdom { }; #define nvkm_perfdom(p) container_of((p), struct nvkm_perfdom, object) +#include <core/object.h> struct nvkm_perfdom { struct nvkm_object object; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec/fuc/g98.fuc0s.h b/drivers/gpu/drm/nouveau/nvkm/engine/sec/fuc/g98.fuc0s.h index 4b57f8814560..6278a0c5fe83 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/sec/fuc/g98.fuc0s.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec/fuc/g98.fuc0s.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ static uint32_t g98_sec_data[] = { /* 0x0000: ctx_dma */ /* 0x0000: ctx_dma_query */ diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h index 7ecc9d4724dc..2f97c806a79d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_SEC2_PRIV_H__ #define __NVKM_SEC2_PRIV_H__ #include <engine/sec2.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h b/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h index 6608bf6c6842..d42862fc43fd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h @@ -1,9 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_SW_CHAN_H__ #define __NVKM_SW_CHAN_H__ #define nvkm_sw_chan(p) container_of((p), struct nvkm_sw_chan, object) -#include "priv.h" +#include <core/object.h> #include <core/event.h> +#include "priv.h" + struct nvkm_sw_chan { const struct nvkm_sw_chan_func *func; struct nvkm_object object; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.h index 25cdfdef2d46..459afd30a484 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_SW_NV50_H__ #define __NVKM_SW_NV50_H__ #define nv50_sw_chan(p) container_of((p), struct nv50_sw_chan, base) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nvsw.h b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nvsw.h index 943ef4c10091..d7034950ba87 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nvsw.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nvsw.h @@ -1,7 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_NVSW_H__ #define __NVKM_NVSW_H__ #define nvkm_nvsw(p) container_of((p), struct nvkm_nvsw, object) -#include "priv.h" +#include <core/object.h> struct nvkm_nvsw { struct nvkm_object object; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/sw/priv.h index 0ef1318dc2fd..4aca1791abc3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_SW_PRIV_H__ #define __NVKM_SW_PRIV_H__ #define nvkm_sw(p) container_of((p), struct nvkm_sw, engine) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c b/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c index 06bdb67a0205..70549381e082 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/xtensa.c @@ -86,7 +86,7 @@ nvkm_xtensa_fini(struct nvkm_engine *engine, bool suspend) nvkm_wr32(device, base + 0xd94, 0); /* FIFO_CTRL */ if (!suspend) - nvkm_memory_del(&xtensa->gpu_fw); + nvkm_memory_unref(&xtensa->gpu_fw); return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c index 1b7f48efd8b1..14be41f24155 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c @@ -60,7 +60,7 @@ nvkm_falcon_read_dmem(struct nvkm_falcon *falcon, u32 start, u32 size, u8 port, } void -nvkm_falcon_bind_context(struct nvkm_falcon *falcon, struct nvkm_gpuobj *inst) +nvkm_falcon_bind_context(struct nvkm_falcon *falcon, struct nvkm_memory *inst) { if (!falcon->func->bind_context) { nvkm_error(falcon->user, diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/priv.h b/drivers/gpu/drm/nouveau/nvkm/falcon/priv.h index 97b56f759d0b..d515ad994199 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_FALCON_PRIV_H__ #define __NVKM_FALCON_PRIV_H__ #include <engine/falcon.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c index 669c24028470..9def926f24d4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c @@ -180,7 +180,7 @@ nvkm_falcon_v1_read_dmem(struct nvkm_falcon *falcon, u32 start, u32 size, } static void -nvkm_falcon_v1_bind_context(struct nvkm_falcon *falcon, struct nvkm_gpuobj *ctx) +nvkm_falcon_v1_bind_context(struct nvkm_falcon *falcon, struct nvkm_memory *ctx) { u32 inst_loc; u32 fbif; @@ -216,7 +216,7 @@ nvkm_falcon_v1_bind_context(struct nvkm_falcon *falcon, struct nvkm_gpuobj *ctx) nvkm_falcon_wr32(falcon, fbif + 4 * FALCON_DMAIDX_PHYS_SYS_NCOH, 0x6); /* Set context */ - switch (nvkm_memory_target(ctx->memory)) { + switch (nvkm_memory_target(ctx)) { case NVKM_MEM_TARGET_VRAM: inst_loc = 0; break; case NVKM_MEM_TARGET_HOST: inst_loc = 2; break; case NVKM_MEM_TARGET_NCOH: inst_loc = 3; break; @@ -228,7 +228,7 @@ nvkm_falcon_v1_bind_context(struct nvkm_falcon *falcon, struct nvkm_gpuobj *ctx) /* Enable context */ nvkm_falcon_mask(falcon, 0x048, 0x1, 0x1); nvkm_falcon_wr32(falcon, 0x054, - ((ctx->addr >> 12) & 0xfffffff) | + ((nvkm_memory_addr(ctx) >> 12) & 0xfffffff) | (inst_loc << 28) | (1 << 30)); nvkm_falcon_mask(falcon, 0x090, 0x10000, 0x10000); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/Kbuild index 1e138b337955..e5830453813d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/Kbuild @@ -3,3 +3,5 @@ nvkm-y += nvkm/subdev/bar/nv50.o nvkm-y += nvkm/subdev/bar/g84.o nvkm-y += nvkm/subdev/bar/gf100.o nvkm-y += nvkm/subdev/bar/gk20a.o +nvkm-y += nvkm/subdev/bar/gm107.o +nvkm-y += nvkm/subdev/bar/gm20b.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c index c561d148cebc..9646adec57cb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c @@ -30,39 +30,76 @@ nvkm_bar_flush(struct nvkm_bar *bar) bar->func->flush(bar); } -struct nvkm_vm * -nvkm_bar_kmap(struct nvkm_bar *bar) +struct nvkm_vmm * +nvkm_bar_bar1_vmm(struct nvkm_device *device) { - /* disallow kmap() until after vm has been bootstrapped */ - if (bar && bar->func->kmap && bar->subdev.oneinit) - return bar->func->kmap(bar); + return device->bar->func->bar1.vmm(device->bar); +} + +struct nvkm_vmm * +nvkm_bar_bar2_vmm(struct nvkm_device *device) +{ + /* Denies access to BAR2 when it's not initialised, used by INSTMEM + * to know when object access needs to go through the BAR0 window. + */ + struct nvkm_bar *bar = device->bar; + if (bar && bar->bar2) + return bar->func->bar2.vmm(bar); return NULL; } -int -nvkm_bar_umap(struct nvkm_bar *bar, u64 size, int type, struct nvkm_vma *vma) +void +nvkm_bar_bar2_fini(struct nvkm_device *device) { - return bar->func->umap(bar, size, type, vma); + struct nvkm_bar *bar = device->bar; + if (bar && bar->bar2) { + bar->func->bar2.fini(bar); + bar->bar2 = false; + } +} + +void +nvkm_bar_bar2_init(struct nvkm_device *device) +{ + struct nvkm_bar *bar = device->bar; + if (bar && bar->subdev.oneinit && !bar->bar2 && bar->func->bar2.init) { + bar->func->bar2.init(bar); + bar->func->bar2.wait(bar); + bar->bar2 = true; + } } static int -nvkm_bar_oneinit(struct nvkm_subdev *subdev) +nvkm_bar_fini(struct nvkm_subdev *subdev, bool suspend) { struct nvkm_bar *bar = nvkm_bar(subdev); - return bar->func->oneinit(bar); + bar->func->bar1.fini(bar); + return 0; } static int nvkm_bar_init(struct nvkm_subdev *subdev) { struct nvkm_bar *bar = nvkm_bar(subdev); - return bar->func->init(bar); + bar->func->bar1.init(bar); + bar->func->bar1.wait(bar); + if (bar->func->init) + bar->func->init(bar); + return 0; +} + +static int +nvkm_bar_oneinit(struct nvkm_subdev *subdev) +{ + struct nvkm_bar *bar = nvkm_bar(subdev); + return bar->func->oneinit(bar); } static void * nvkm_bar_dtor(struct nvkm_subdev *subdev) { struct nvkm_bar *bar = nvkm_bar(subdev); + nvkm_bar_bar2_fini(subdev->device); return bar->func->dtor(bar); } @@ -71,6 +108,7 @@ nvkm_bar = { .dtor = nvkm_bar_dtor, .oneinit = nvkm_bar_oneinit, .init = nvkm_bar_init, + .fini = nvkm_bar_fini, }; void diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/g84.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/g84.c index ef717136c838..87f26f54b481 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/g84.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/g84.c @@ -44,8 +44,14 @@ g84_bar_func = { .dtor = nv50_bar_dtor, .oneinit = nv50_bar_oneinit, .init = nv50_bar_init, - .kmap = nv50_bar_kmap, - .umap = nv50_bar_umap, + .bar1.init = nv50_bar_bar1_init, + .bar1.fini = nv50_bar_bar1_fini, + .bar1.wait = nv50_bar_bar1_wait, + .bar1.vmm = nv50_bar_bar1_vmm, + .bar2.init = nv50_bar_bar2_init, + .bar2.fini = nv50_bar_bar2_fini, + .bar2.wait = nv50_bar_bar1_wait, + .bar2.vmm = nv50_bar_bar2_vmm, .flush = g84_bar_flush, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.c index 676c167c95b9..a3ba7f50198b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.c @@ -23,39 +23,73 @@ */ #include "gf100.h" -#include <core/gpuobj.h> +#include <core/memory.h> #include <core/option.h> #include <subdev/fb.h> #include <subdev/mmu.h> -static struct nvkm_vm * -gf100_bar_kmap(struct nvkm_bar *base) +struct nvkm_vmm * +gf100_bar_bar1_vmm(struct nvkm_bar *base) { - return gf100_bar(base)->bar[0].vm; + return gf100_bar(base)->bar[1].vmm; } -int -gf100_bar_umap(struct nvkm_bar *base, u64 size, int type, struct nvkm_vma *vma) +void +gf100_bar_bar1_wait(struct nvkm_bar *base) +{ + /* NFI why it's twice. */ + nvkm_bar_flush(base); + nvkm_bar_flush(base); +} + +void +gf100_bar_bar1_fini(struct nvkm_bar *bar) { + nvkm_mask(bar->subdev.device, 0x001704, 0x80000000, 0x00000000); +} + +void +gf100_bar_bar1_init(struct nvkm_bar *base) +{ + struct nvkm_device *device = base->subdev.device; struct gf100_bar *bar = gf100_bar(base); - return nvkm_vm_get(bar->bar[1].vm, size, type, NV_MEM_ACCESS_RW, vma); + const u32 addr = nvkm_memory_addr(bar->bar[1].inst) >> 12; + nvkm_wr32(device, 0x001704, 0x80000000 | addr); +} + +struct nvkm_vmm * +gf100_bar_bar2_vmm(struct nvkm_bar *base) +{ + return gf100_bar(base)->bar[0].vmm; +} + +void +gf100_bar_bar2_fini(struct nvkm_bar *bar) +{ + nvkm_mask(bar->subdev.device, 0x001714, 0x80000000, 0x00000000); +} + +void +gf100_bar_bar2_init(struct nvkm_bar *base) +{ + struct nvkm_device *device = base->subdev.device; + struct gf100_bar *bar = gf100_bar(base); + u32 addr = nvkm_memory_addr(bar->bar[0].inst) >> 12; + if (bar->bar2_halve) + addr |= 0x40000000; + nvkm_wr32(device, 0x001714, 0x80000000 | addr); } static int -gf100_bar_ctor_vm(struct gf100_bar *bar, struct gf100_bar_vm *bar_vm, - struct lock_class_key *key, int bar_nr) +gf100_bar_oneinit_bar(struct gf100_bar *bar, struct gf100_barN *bar_vm, + struct lock_class_key *key, int bar_nr) { struct nvkm_device *device = bar->base.subdev.device; - struct nvkm_vm *vm; resource_size_t bar_len; int ret; ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0, false, - &bar_vm->mem); - if (ret) - return ret; - - ret = nvkm_gpuobj_new(device, 0x8000, 0, false, NULL, &bar_vm->pgd); + &bar_vm->inst); if (ret) return ret; @@ -63,98 +97,64 @@ gf100_bar_ctor_vm(struct gf100_bar *bar, struct gf100_bar_vm *bar_vm, if (bar_nr == 3 && bar->bar2_halve) bar_len >>= 1; - ret = nvkm_vm_new(device, 0, bar_len, 0, key, &vm); + ret = nvkm_vmm_new(device, 0, bar_len, NULL, 0, key, + (bar_nr == 3) ? "bar2" : "bar1", &bar_vm->vmm); if (ret) return ret; - atomic_inc(&vm->engref[NVKM_SUBDEV_BAR]); + atomic_inc(&bar_vm->vmm->engref[NVKM_SUBDEV_BAR]); + bar_vm->vmm->debug = bar->base.subdev.debug; /* * Bootstrap page table lookup. */ if (bar_nr == 3) { - ret = nvkm_vm_boot(vm, bar_len); - if (ret) { - nvkm_vm_ref(NULL, &vm, NULL); + ret = nvkm_vmm_boot(bar_vm->vmm); + if (ret) return ret; - } } - ret = nvkm_vm_ref(vm, &bar_vm->vm, bar_vm->pgd); - nvkm_vm_ref(NULL, &vm, NULL); - if (ret) - return ret; - - nvkm_kmap(bar_vm->mem); - nvkm_wo32(bar_vm->mem, 0x0200, lower_32_bits(bar_vm->pgd->addr)); - nvkm_wo32(bar_vm->mem, 0x0204, upper_32_bits(bar_vm->pgd->addr)); - nvkm_wo32(bar_vm->mem, 0x0208, lower_32_bits(bar_len - 1)); - nvkm_wo32(bar_vm->mem, 0x020c, upper_32_bits(bar_len - 1)); - nvkm_done(bar_vm->mem); - return 0; + return nvkm_vmm_join(bar_vm->vmm, bar_vm->inst); } int gf100_bar_oneinit(struct nvkm_bar *base) { static struct lock_class_key bar1_lock; - static struct lock_class_key bar3_lock; + static struct lock_class_key bar2_lock; struct gf100_bar *bar = gf100_bar(base); int ret; - /* BAR3 */ - if (bar->base.func->kmap) { - ret = gf100_bar_ctor_vm(bar, &bar->bar[0], &bar3_lock, 3); + /* BAR2 */ + if (bar->base.func->bar2.init) { + ret = gf100_bar_oneinit_bar(bar, &bar->bar[0], &bar2_lock, 3); if (ret) return ret; + + bar->base.subdev.oneinit = true; + nvkm_bar_bar2_init(bar->base.subdev.device); } /* BAR1 */ - ret = gf100_bar_ctor_vm(bar, &bar->bar[1], &bar1_lock, 1); + ret = gf100_bar_oneinit_bar(bar, &bar->bar[1], &bar1_lock, 1); if (ret) return ret; return 0; } -int -gf100_bar_init(struct nvkm_bar *base) -{ - struct gf100_bar *bar = gf100_bar(base); - struct nvkm_device *device = bar->base.subdev.device; - u32 addr; - - nvkm_mask(device, 0x000200, 0x00000100, 0x00000000); - nvkm_mask(device, 0x000200, 0x00000100, 0x00000100); - - addr = nvkm_memory_addr(bar->bar[1].mem) >> 12; - nvkm_wr32(device, 0x001704, 0x80000000 | addr); - - if (bar->bar[0].mem) { - addr = nvkm_memory_addr(bar->bar[0].mem) >> 12; - if (bar->bar2_halve) - addr |= 0x40000000; - nvkm_wr32(device, 0x001714, 0x80000000 | addr); - } - - return 0; -} - void * gf100_bar_dtor(struct nvkm_bar *base) { struct gf100_bar *bar = gf100_bar(base); - nvkm_vm_ref(NULL, &bar->bar[1].vm, bar->bar[1].pgd); - nvkm_gpuobj_del(&bar->bar[1].pgd); - nvkm_memory_del(&bar->bar[1].mem); + nvkm_vmm_part(bar->bar[1].vmm, bar->bar[1].inst); + nvkm_vmm_unref(&bar->bar[1].vmm); + nvkm_memory_unref(&bar->bar[1].inst); - if (bar->bar[0].vm) { - nvkm_memory_del(&bar->bar[0].vm->pgt[0].mem[0]); - nvkm_vm_ref(NULL, &bar->bar[0].vm, bar->bar[0].pgd); - } - nvkm_gpuobj_del(&bar->bar[0].pgd); - nvkm_memory_del(&bar->bar[0].mem); + nvkm_vmm_part(bar->bar[0].vmm, bar->bar[0].inst); + nvkm_vmm_unref(&bar->bar[0].vmm); + nvkm_memory_unref(&bar->bar[0].inst); return bar; } @@ -175,9 +175,14 @@ static const struct nvkm_bar_func gf100_bar_func = { .dtor = gf100_bar_dtor, .oneinit = gf100_bar_oneinit, - .init = gf100_bar_init, - .kmap = gf100_bar_kmap, - .umap = gf100_bar_umap, + .bar1.init = gf100_bar_bar1_init, + .bar1.fini = gf100_bar_bar1_fini, + .bar1.wait = gf100_bar_bar1_wait, + .bar1.vmm = gf100_bar_bar1_vmm, + .bar2.init = gf100_bar_bar2_init, + .bar2.fini = gf100_bar_bar2_fini, + .bar2.wait = gf100_bar_bar1_wait, + .bar2.vmm = gf100_bar_bar2_vmm, .flush = g84_bar_flush, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.h b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.h index 20a5255362ba..4f2b66e8d795 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gf100.h @@ -1,24 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __GF100_BAR_H__ #define __GF100_BAR_H__ #define gf100_bar(p) container_of((p), struct gf100_bar, base) #include "priv.h" -struct gf100_bar_vm { - struct nvkm_memory *mem; - struct nvkm_gpuobj *pgd; - struct nvkm_vm *vm; +struct gf100_barN { + struct nvkm_memory *inst; + struct nvkm_vmm *vmm; }; struct gf100_bar { struct nvkm_bar base; bool bar2_halve; - struct gf100_bar_vm bar[2]; + struct gf100_barN bar[2]; }; int gf100_bar_new_(const struct nvkm_bar_func *, struct nvkm_device *, int, struct nvkm_bar **); void *gf100_bar_dtor(struct nvkm_bar *); int gf100_bar_oneinit(struct nvkm_bar *); -int gf100_bar_init(struct nvkm_bar *); -int gf100_bar_umap(struct nvkm_bar *, u64, int, struct nvkm_vma *); +void gf100_bar_bar1_init(struct nvkm_bar *); +void gf100_bar_bar1_wait(struct nvkm_bar *); +struct nvkm_vmm *gf100_bar_bar1_vmm(struct nvkm_bar *); +void gf100_bar_bar2_init(struct nvkm_bar *); +struct nvkm_vmm *gf100_bar_bar2_vmm(struct nvkm_bar *); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c index 9232fab4274c..b10077d38839 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c @@ -25,8 +25,10 @@ static const struct nvkm_bar_func gk20a_bar_func = { .dtor = gf100_bar_dtor, .oneinit = gf100_bar_oneinit, - .init = gf100_bar_init, - .umap = gf100_bar_umap, + .bar1.init = gf100_bar_bar1_init, + .bar1.fini = gf100_bar_bar1_fini, + .bar1.wait = gf100_bar_bar1_wait, + .bar1.vmm = gf100_bar_bar1_vmm, .flush = g84_bar_flush, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gm107.c new file mode 100644 index 000000000000..3ddf9222d935 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gm107.c @@ -0,0 +1,65 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "gf100.h" + +#include <subdev/timer.h> + +void +gm107_bar_bar1_wait(struct nvkm_bar *bar) +{ + struct nvkm_device *device = bar->subdev.device; + nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x001710) & 0x00000003)) + break; + ); +} + +static void +gm107_bar_bar2_wait(struct nvkm_bar *bar) +{ + struct nvkm_device *device = bar->subdev.device; + nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x001710) & 0x0000000c)) + break; + ); +} + +static const struct nvkm_bar_func +gm107_bar_func = { + .dtor = gf100_bar_dtor, + .oneinit = gf100_bar_oneinit, + .bar1.init = gf100_bar_bar1_init, + .bar1.fini = gf100_bar_bar1_fini, + .bar1.wait = gm107_bar_bar1_wait, + .bar1.vmm = gf100_bar_bar1_vmm, + .bar2.init = gf100_bar_bar2_init, + .bar2.fini = gf100_bar_bar2_fini, + .bar2.wait = gm107_bar_bar2_wait, + .bar2.vmm = gf100_bar_bar2_vmm, + .flush = g84_bar_flush, +}; + +int +gm107_bar_new(struct nvkm_device *device, int index, struct nvkm_bar **pbar) +{ + return gf100_bar_new_(&gm107_bar_func, device, index, pbar); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gm20b.c new file mode 100644 index 000000000000..950bff1955ad --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gm20b.c @@ -0,0 +1,42 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "gf100.h" + +static const struct nvkm_bar_func +gm20b_bar_func = { + .dtor = gf100_bar_dtor, + .oneinit = gf100_bar_oneinit, + .bar1.init = gf100_bar_bar1_init, + .bar1.fini = gf100_bar_bar1_fini, + .bar1.wait = gm107_bar_bar1_wait, + .bar1.vmm = gf100_bar_bar1_vmm, + .flush = g84_bar_flush, +}; + +int +gm20b_bar_new(struct nvkm_device *device, int index, struct nvkm_bar **pbar) +{ + int ret = gf100_bar_new_(&gm20b_bar_func, device, index, pbar); + if (ret == 0) + (*pbar)->iomap_uncached = true; + return ret; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.c index 6eff637ac301..157b076a1272 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.c @@ -28,19 +28,6 @@ #include <subdev/mmu.h> #include <subdev/timer.h> -struct nvkm_vm * -nv50_bar_kmap(struct nvkm_bar *base) -{ - return nv50_bar(base)->bar3_vm; -} - -int -nv50_bar_umap(struct nvkm_bar *base, u64 size, int type, struct nvkm_vma *vma) -{ - struct nv50_bar *bar = nv50_bar(base); - return nvkm_vm_get(bar->bar1_vm, size, type, NV_MEM_ACCESS_RW, vma); -} - static void nv50_bar_flush(struct nvkm_bar *base) { @@ -56,14 +43,72 @@ nv50_bar_flush(struct nvkm_bar *base) spin_unlock_irqrestore(&bar->base.lock, flags); } +struct nvkm_vmm * +nv50_bar_bar1_vmm(struct nvkm_bar *base) +{ + return nv50_bar(base)->bar1_vmm; +} + +void +nv50_bar_bar1_wait(struct nvkm_bar *base) +{ + nvkm_bar_flush(base); +} + +void +nv50_bar_bar1_fini(struct nvkm_bar *bar) +{ + nvkm_wr32(bar->subdev.device, 0x001708, 0x00000000); +} + +void +nv50_bar_bar1_init(struct nvkm_bar *base) +{ + struct nvkm_device *device = base->subdev.device; + struct nv50_bar *bar = nv50_bar(base); + nvkm_wr32(device, 0x001708, 0x80000000 | bar->bar1->node->offset >> 4); +} + +struct nvkm_vmm * +nv50_bar_bar2_vmm(struct nvkm_bar *base) +{ + return nv50_bar(base)->bar2_vmm; +} + +void +nv50_bar_bar2_fini(struct nvkm_bar *bar) +{ + nvkm_wr32(bar->subdev.device, 0x00170c, 0x00000000); +} + +void +nv50_bar_bar2_init(struct nvkm_bar *base) +{ + struct nvkm_device *device = base->subdev.device; + struct nv50_bar *bar = nv50_bar(base); + nvkm_wr32(device, 0x001704, 0x00000000 | bar->mem->addr >> 12); + nvkm_wr32(device, 0x001704, 0x40000000 | bar->mem->addr >> 12); + nvkm_wr32(device, 0x00170c, 0x80000000 | bar->bar2->node->offset >> 4); +} + +void +nv50_bar_init(struct nvkm_bar *base) +{ + struct nv50_bar *bar = nv50_bar(base); + struct nvkm_device *device = bar->base.subdev.device; + int i; + + for (i = 0; i < 8; i++) + nvkm_wr32(device, 0x001900 + (i * 4), 0x00000000); +} + int nv50_bar_oneinit(struct nvkm_bar *base) { struct nv50_bar *bar = nv50_bar(base); struct nvkm_device *device = bar->base.subdev.device; static struct lock_class_key bar1_lock; - static struct lock_class_key bar3_lock; - struct nvkm_vm *vm; + static struct lock_class_key bar2_lock; u64 start, limit; int ret; @@ -80,51 +125,54 @@ nv50_bar_oneinit(struct nvkm_bar *base) if (ret) return ret; - /* BAR3 */ + /* BAR2 */ start = 0x0100000000ULL; limit = start + device->func->resource_size(device, 3); - ret = nvkm_vm_new(device, start, limit - start, start, &bar3_lock, &vm); + ret = nvkm_vmm_new(device, start, limit-- - start, NULL, 0, + &bar2_lock, "bar2", &bar->bar2_vmm); if (ret) return ret; - atomic_inc(&vm->engref[NVKM_SUBDEV_BAR]); + atomic_inc(&bar->bar2_vmm->engref[NVKM_SUBDEV_BAR]); + bar->bar2_vmm->debug = bar->base.subdev.debug; - ret = nvkm_vm_boot(vm, limit-- - start); + ret = nvkm_vmm_boot(bar->bar2_vmm); if (ret) return ret; - ret = nvkm_vm_ref(vm, &bar->bar3_vm, bar->pgd); - nvkm_vm_ref(NULL, &vm, NULL); + ret = nvkm_vmm_join(bar->bar2_vmm, bar->mem->memory); if (ret) return ret; - ret = nvkm_gpuobj_new(device, 24, 16, false, bar->mem, &bar->bar3); + ret = nvkm_gpuobj_new(device, 24, 16, false, bar->mem, &bar->bar2); if (ret) return ret; - nvkm_kmap(bar->bar3); - nvkm_wo32(bar->bar3, 0x00, 0x7fc00000); - nvkm_wo32(bar->bar3, 0x04, lower_32_bits(limit)); - nvkm_wo32(bar->bar3, 0x08, lower_32_bits(start)); - nvkm_wo32(bar->bar3, 0x0c, upper_32_bits(limit) << 24 | + nvkm_kmap(bar->bar2); + nvkm_wo32(bar->bar2, 0x00, 0x7fc00000); + nvkm_wo32(bar->bar2, 0x04, lower_32_bits(limit)); + nvkm_wo32(bar->bar2, 0x08, lower_32_bits(start)); + nvkm_wo32(bar->bar2, 0x0c, upper_32_bits(limit) << 24 | upper_32_bits(start)); - nvkm_wo32(bar->bar3, 0x10, 0x00000000); - nvkm_wo32(bar->bar3, 0x14, 0x00000000); - nvkm_done(bar->bar3); + nvkm_wo32(bar->bar2, 0x10, 0x00000000); + nvkm_wo32(bar->bar2, 0x14, 0x00000000); + nvkm_done(bar->bar2); + + bar->base.subdev.oneinit = true; + nvkm_bar_bar2_init(device); /* BAR1 */ start = 0x0000000000ULL; limit = start + device->func->resource_size(device, 1); - ret = nvkm_vm_new(device, start, limit-- - start, start, &bar1_lock, &vm); - if (ret) - return ret; + ret = nvkm_vmm_new(device, start, limit-- - start, NULL, 0, + &bar1_lock, "bar1", &bar->bar1_vmm); - atomic_inc(&vm->engref[NVKM_SUBDEV_BAR]); + atomic_inc(&bar->bar1_vmm->engref[NVKM_SUBDEV_BAR]); + bar->bar1_vmm->debug = bar->base.subdev.debug; - ret = nvkm_vm_ref(vm, &bar->bar1_vm, bar->pgd); - nvkm_vm_ref(NULL, &vm, NULL); + ret = nvkm_vmm_join(bar->bar1_vmm, bar->mem->memory); if (ret) return ret; @@ -144,45 +192,21 @@ nv50_bar_oneinit(struct nvkm_bar *base) return 0; } -int -nv50_bar_init(struct nvkm_bar *base) -{ - struct nv50_bar *bar = nv50_bar(base); - struct nvkm_device *device = bar->base.subdev.device; - int i; - - nvkm_mask(device, 0x000200, 0x00000100, 0x00000000); - nvkm_mask(device, 0x000200, 0x00000100, 0x00000100); - nvkm_wr32(device, 0x100c80, 0x00060001); - if (nvkm_msec(device, 2000, - if (!(nvkm_rd32(device, 0x100c80) & 0x00000001)) - break; - ) < 0) - return -EBUSY; - - nvkm_wr32(device, 0x001704, 0x00000000 | bar->mem->addr >> 12); - nvkm_wr32(device, 0x001704, 0x40000000 | bar->mem->addr >> 12); - nvkm_wr32(device, 0x001708, 0x80000000 | bar->bar1->node->offset >> 4); - nvkm_wr32(device, 0x00170c, 0x80000000 | bar->bar3->node->offset >> 4); - for (i = 0; i < 8; i++) - nvkm_wr32(device, 0x001900 + (i * 4), 0x00000000); - return 0; -} - void * nv50_bar_dtor(struct nvkm_bar *base) { struct nv50_bar *bar = nv50_bar(base); - nvkm_gpuobj_del(&bar->bar1); - nvkm_vm_ref(NULL, &bar->bar1_vm, bar->pgd); - nvkm_gpuobj_del(&bar->bar3); - if (bar->bar3_vm) { - nvkm_memory_del(&bar->bar3_vm->pgt[0].mem[0]); - nvkm_vm_ref(NULL, &bar->bar3_vm, bar->pgd); + if (bar->mem) { + nvkm_gpuobj_del(&bar->bar1); + nvkm_vmm_part(bar->bar1_vmm, bar->mem->memory); + nvkm_vmm_unref(&bar->bar1_vmm); + nvkm_gpuobj_del(&bar->bar2); + nvkm_vmm_part(bar->bar2_vmm, bar->mem->memory); + nvkm_vmm_unref(&bar->bar2_vmm); + nvkm_gpuobj_del(&bar->pgd); + nvkm_gpuobj_del(&bar->pad); + nvkm_gpuobj_del(&bar->mem); } - nvkm_gpuobj_del(&bar->pgd); - nvkm_gpuobj_del(&bar->pad); - nvkm_gpuobj_del(&bar->mem); return bar; } @@ -204,8 +228,14 @@ nv50_bar_func = { .dtor = nv50_bar_dtor, .oneinit = nv50_bar_oneinit, .init = nv50_bar_init, - .kmap = nv50_bar_kmap, - .umap = nv50_bar_umap, + .bar1.init = nv50_bar_bar1_init, + .bar1.fini = nv50_bar_bar1_fini, + .bar1.wait = nv50_bar_bar1_wait, + .bar1.vmm = nv50_bar_bar1_vmm, + .bar2.init = nv50_bar_bar2_init, + .bar2.fini = nv50_bar_bar2_fini, + .bar2.wait = nv50_bar_bar1_wait, + .bar2.vmm = nv50_bar_bar2_vmm, .flush = nv50_bar_flush, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.h b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.h index 1eb764f22a49..2fe833f6d9f7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV50_BAR_H__ #define __NV50_BAR_H__ #define nv50_bar(p) container_of((p), struct nv50_bar, base) @@ -9,18 +10,20 @@ struct nv50_bar { struct nvkm_gpuobj *mem; struct nvkm_gpuobj *pad; struct nvkm_gpuobj *pgd; - struct nvkm_vm *bar1_vm; + struct nvkm_vmm *bar1_vmm; struct nvkm_gpuobj *bar1; - struct nvkm_vm *bar3_vm; - struct nvkm_gpuobj *bar3; + struct nvkm_vmm *bar2_vmm; + struct nvkm_gpuobj *bar2; }; int nv50_bar_new_(const struct nvkm_bar_func *, struct nvkm_device *, int, u32 pgd_addr, struct nvkm_bar **); void *nv50_bar_dtor(struct nvkm_bar *); int nv50_bar_oneinit(struct nvkm_bar *); -int nv50_bar_init(struct nvkm_bar *); -struct nvkm_vm *nv50_bar_kmap(struct nvkm_bar *); -int nv50_bar_umap(struct nvkm_bar *, u64, int, struct nvkm_vma *); -void nv50_bar_unmap(struct nvkm_bar *, struct nvkm_vma *); +void nv50_bar_init(struct nvkm_bar *); +void nv50_bar_bar1_init(struct nvkm_bar *); +void nv50_bar_bar1_wait(struct nvkm_bar *); +struct nvkm_vmm *nv50_bar_bar1_vmm(struct nvkm_bar *); +void nv50_bar_bar2_init(struct nvkm_bar *); +struct nvkm_vmm *nv50_bar_bar2_vmm(struct nvkm_bar *); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/priv.h index d834ef20db5b..01ba5b26666e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_BAR_PRIV_H__ #define __NVKM_BAR_PRIV_H__ #define nvkm_bar(p) container_of((p), struct nvkm_bar, subdev) @@ -9,11 +10,25 @@ void nvkm_bar_ctor(const struct nvkm_bar_func *, struct nvkm_device *, struct nvkm_bar_func { void *(*dtor)(struct nvkm_bar *); int (*oneinit)(struct nvkm_bar *); - int (*init)(struct nvkm_bar *); - struct nvkm_vm *(*kmap)(struct nvkm_bar *); - int (*umap)(struct nvkm_bar *, u64 size, int type, struct nvkm_vma *); + void (*init)(struct nvkm_bar *); + + struct { + void (*init)(struct nvkm_bar *); + void (*fini)(struct nvkm_bar *); + void (*wait)(struct nvkm_bar *); + struct nvkm_vmm *(*vmm)(struct nvkm_bar *); + } bar1, bar2; + void (*flush)(struct nvkm_bar *); }; +void nv50_bar_bar1_fini(struct nvkm_bar *); +void nv50_bar_bar2_fini(struct nvkm_bar *); + void g84_bar_flush(struct nvkm_bar *); + +void gf100_bar_bar1_fini(struct nvkm_bar *); +void gf100_bar_bar2_fini(struct nvkm_bar *); + +void gm107_bar_bar1_wait(struct nvkm_bar *); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/iccsense.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/iccsense.c index 23caef8df17f..73e463ed55c3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/iccsense.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/iccsense.c @@ -99,7 +99,7 @@ nvbios_iccsense_parse(struct nvkm_bios *bios, struct nvbios_iccsense *iccsense) rail->extdev_id = nvbios_rd08(bios, entry + 0x1); res_start = 0x5; break; - }; + } if (nvbios_extdev_parse(bios, rail->extdev_id, &extdev)) continue; @@ -115,7 +115,7 @@ nvbios_iccsense_parse(struct nvkm_bios *bios, struct nvbios_iccsense *iccsense) default: rail->resistor_count = 0; break; - }; + } for (r = 0; r < rail->resistor_count; ++r) { rail->resistors[r].mohm = nvbios_rd08(bios, entry + res_start + r * 2); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c index b58ee99f7bfc..9cc10e438b3d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c @@ -36,6 +36,8 @@ #include <subdev/i2c.h> #include <subdev/vga.h> +#include <linux/kernel.h> + #define bioslog(lvl, fmt, args...) do { \ nvkm_printk(init->subdev, lvl, info, "0x%08x[%c]: "fmt, \ init->offset, init_exec(init) ? \ @@ -2271,8 +2273,6 @@ static struct nvbios_init_opcode { [0xaa] = { init_reserved }, }; -#define init_opcode_nr (sizeof(init_opcode) / sizeof(init_opcode[0])) - int nvbios_exec(struct nvbios_init *init) { @@ -2281,7 +2281,8 @@ nvbios_exec(struct nvbios_init *init) init->nested++; while (init->offset) { u8 opcode = nvbios_rd08(bios, init->offset); - if (opcode >= init_opcode_nr || !init_opcode[opcode].exec) { + if (opcode >= ARRAY_SIZE(init_opcode) || + !init_opcode[opcode].exec) { error("unknown opcode 0x%02x\n", opcode); return -EINVAL; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h index 7d1d3c6b4b72..33435ca16311 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_BIOS_PRIV_H__ #define __NVKM_BIOS_PRIV_H__ #define nvkm_bios(p) container_of((p), struct nvkm_bios, subdev) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/timing.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/timing.c index 7e83c3985020..20ff5173cf8f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/timing.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/timing.c @@ -115,16 +115,21 @@ nvbios_timingEp(struct nvkm_bios *bios, int idx, switch (min_t(u8, *hdr, 25)) { case 25: p->timing_10_24 = nvbios_rd08(bios, data + 0x18); + /* fall through */ case 24: case 23: case 22: p->timing_10_21 = nvbios_rd08(bios, data + 0x15); + /* fall through */ case 21: p->timing_10_20 = nvbios_rd08(bios, data + 0x14); + /* fall through */ case 20: p->timing_10_CWL = nvbios_rd08(bios, data + 0x13); + /* fall through */ case 19: p->timing_10_18 = nvbios_rd08(bios, data + 0x12); + /* fall through */ case 18: case 17: p->timing_10_16 = nvbios_rd08(bios, data + 0x10); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/hwsq.h b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/hwsq.h index 54ec3b131dfd..17ac1812a928 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/hwsq.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/hwsq.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_BUS_HWSQ_H__ #define __NVKM_BUS_HWSQ_H__ #include <subdev/bus.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/priv.h index a130f2c642d5..ef01e569352d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_BUS_PRIV_H__ #define __NVKM_BUS_PRIV_H__ #define nvkm_bus(p) container_of((p), struct nvkm_bus, subdev) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gt215.h b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gt215.h index 8865b59fe575..1ea886a4301f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gt215.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gt215.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_CLK_NVA3_H__ #define __NVKM_CLK_NVA3_H__ #include "priv.h" diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.h b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.h index d3c7fb6efa16..f134d979d884 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV50_CLK_H__ #define __NV50_CLK_H__ #define nv50_clk(p) container_of((p), struct nv50_clk, base) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/pll.h b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/pll.h index 44020a30dee8..9a39f1fd2976 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/pll.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/pll.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_PLL_H__ #define __NVKM_PLL_H__ #include <core/os.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/priv.h index 51eafc00c8b1..b656177923fb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_CLK_PRIV_H__ #define __NVKM_CLK_PRIV_H__ #define nvkm_clk(p) container_of((p), struct nvkm_clk, subdev) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/seq.h b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/seq.h index d717e8b8f679..d0715fe84328 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/seq.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/seq.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_CLK_SEQ_H__ #define __NVKM_CLK_SEQ_H__ #include <subdev/bus/hwsq.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv04.c index 158977f8a6e6..c3dae05348eb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv04.c @@ -119,11 +119,11 @@ powerctrl_1_shift(int chip_version, int reg) switch (reg) { case 0x680520: - shift += 4; + shift += 4; /* fall through */ case 0x680508: - shift += 4; + shift += 4; /* fall through */ case 0x680504: - shift += 4; + shift += 4; /* fall through */ case 0x680500: shift += 4; } @@ -245,11 +245,11 @@ setPLL_double_highregs(struct nvkm_devinit *init, u32 reg1, switch (reg1) { case 0x680504: - shift_c040 += 2; + shift_c040 += 2; /* fall through */ case 0x680500: - shift_c040 += 2; + shift_c040 += 2; /* fall through */ case 0x680520: - shift_c040 += 2; + shift_c040 += 2; /* fall through */ case 0x680508: shift_c040 += 2; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv04.h b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv04.h index 4a87c8c2bce8..b18e49847eee 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv04.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv04.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV04_DEVINIT_H__ #define __NV04_DEVINIT_H__ #define nv04_devinit(p) container_of((p), struct nv04_devinit, base) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h index 25d2ae3af1c6..315ebaff1165 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV50_DEVINIT_H__ #define __NV50_DEVINIT_H__ #define nv50_devinit(p) container_of((p), struct nv50_devinit, base) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h index e1f6ae58f1d3..5b3097a586dd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_DEVINIT_PRIV_H__ #define __NVKM_DEVINIT_PRIV_H__ #define nvkm_devinit(p) container_of((p), struct nvkm_devinit, subdev) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c index a7049c041594..73b5d46104bd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c @@ -31,12 +31,6 @@ #include <engine/gr.h> #include <engine/mpeg.h> -bool -nvkm_fb_memtype_valid(struct nvkm_fb *fb, u32 memtype) -{ - return fb->func->memtype_valid(fb, memtype); -} - void nvkm_fb_tile_fini(struct nvkm_fb *fb, int region, struct nvkm_fb_tile *tile) { @@ -100,6 +94,7 @@ static int nvkm_fb_oneinit(struct nvkm_subdev *subdev) { struct nvkm_fb *fb = nvkm_fb(subdev); + u32 tags = 0; if (fb->func->ram_new) { int ret = fb->func->ram_new(fb, &fb->ram); @@ -115,7 +110,16 @@ nvkm_fb_oneinit(struct nvkm_subdev *subdev) return ret; } - return 0; + /* Initialise compression tag allocator. + * + * LTC oneinit() will override this on Fermi and newer. + */ + if (fb->func->tags) { + tags = fb->func->tags(fb); + nvkm_debug(subdev, "%d comptags\n", tags); + } + + return nvkm_mm_init(&fb->tags, 0, 0, tags, 1); } static int @@ -135,8 +139,13 @@ nvkm_fb_init(struct nvkm_subdev *subdev) if (fb->func->init) fb->func->init(fb); - if (fb->func->init_page) - fb->func->init_page(fb); + + if (fb->func->init_page) { + ret = fb->func->init_page(fb); + if (WARN_ON(ret)) + return ret; + } + if (fb->func->init_unkn) fb->func->init_unkn(fb); return 0; @@ -148,12 +157,13 @@ nvkm_fb_dtor(struct nvkm_subdev *subdev) struct nvkm_fb *fb = nvkm_fb(subdev); int i; - nvkm_memory_del(&fb->mmu_wr); - nvkm_memory_del(&fb->mmu_rd); + nvkm_memory_unref(&fb->mmu_wr); + nvkm_memory_unref(&fb->mmu_rd); for (i = 0; i < fb->tile.regions; i++) fb->func->tile.fini(fb, i, &fb->tile.region[i]); + nvkm_mm_fini(&fb->tags); nvkm_ram_del(&fb->ram); if (fb->func->dtor) @@ -176,7 +186,8 @@ nvkm_fb_ctor(const struct nvkm_fb_func *func, struct nvkm_device *device, nvkm_subdev_ctor(&nvkm_fb, device, index, &fb->subdev); fb->func = func; fb->tile.regions = fb->func->tile.regions; - fb->page = nvkm_longopt(device->cfgopt, "NvFbBigPage", 0); + fb->page = nvkm_longopt(device->cfgopt, "NvFbBigPage", + fb->func->default_bigpage); } int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/g84.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/g84.c index 9c28392d07e4..06bf95c0c549 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/g84.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/g84.c @@ -27,6 +27,7 @@ static const struct nv50_fb_func g84_fb = { .ram_new = nv50_ram_new, + .tags = nv20_fb_tags, .trap = 0x001d07ff, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c index a239e73562c8..47d28c279707 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c @@ -27,15 +27,6 @@ #include <core/memory.h> #include <core/option.h> -extern const u8 gf100_pte_storage_type_map[256]; - -bool -gf100_fb_memtype_valid(struct nvkm_fb *fb, u32 tile_flags) -{ - u8 memtype = (tile_flags & 0x0000ff00) >> 8; - return likely((gf100_pte_storage_type_map[memtype] != 0xff)); -} - void gf100_fb_intr(struct nvkm_fb *base) { @@ -80,20 +71,17 @@ gf100_fb_oneinit(struct nvkm_fb *base) return 0; } -void +int gf100_fb_init_page(struct nvkm_fb *fb) { struct nvkm_device *device = fb->subdev.device; switch (fb->page) { - case 16: - nvkm_mask(device, 0x100c80, 0x00000001, 0x00000001); - break; - case 17: + case 16: nvkm_mask(device, 0x100c80, 0x00000001, 0x00000001); break; + case 17: nvkm_mask(device, 0x100c80, 0x00000001, 0x00000000); break; default: - nvkm_mask(device, 0x100c80, 0x00000001, 0x00000000); - fb->page = 17; - break; + return -EINVAL; } + return 0; } void @@ -143,7 +131,7 @@ gf100_fb = { .init_page = gf100_fb_init_page, .intr = gf100_fb_intr, .ram_new = gf100_ram_new, - .memtype_valid = gf100_fb_memtype_valid, + .default_bigpage = 17, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h index 412eb89834e8..ab261310753a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_RAM_NVC0_H__ #define __NVKM_RAM_NVC0_H__ #define gf100_fb(p) container_of((p), struct gf100_fb, base) @@ -17,7 +18,5 @@ void gf100_fb_intr(struct nvkm_fb *); void gp100_fb_init(struct nvkm_fb *); -void gm200_fb_init_page(struct nvkm_fb *fb); void gm200_fb_init(struct nvkm_fb *base); - #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c index 56af84aa333b..4a9f463745b5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c @@ -32,7 +32,7 @@ gf108_fb = { .init_page = gf100_fb_init_page, .intr = gf100_fb_intr, .ram_new = gf108_ram_new, - .memtype_valid = gf100_fb_memtype_valid, + .default_bigpage = 17, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c index 4245e2e6e604..0a6e8eaad42c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c @@ -32,7 +32,7 @@ gk104_fb = { .init_page = gf100_fb_init_page, .intr = gf100_fb_intr, .ram_new = gk104_ram_new, - .memtype_valid = gf100_fb_memtype_valid, + .default_bigpage = 17, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c index 5d34d6136616..a7e29b125094 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c @@ -30,7 +30,7 @@ gk20a_fb = { .init = gf100_fb_init, .init_page = gf100_fb_init_page, .intr = gf100_fb_intr, - .memtype_valid = gf100_fb_memtype_valid, + .default_bigpage = 17, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c index db699025f546..69c876d5d1c1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c @@ -32,7 +32,7 @@ gm107_fb = { .init_page = gf100_fb_init_page, .intr = gf100_fb_intr, .ram_new = gm107_ram_new, - .memtype_valid = gf100_fb_memtype_valid, + .default_bigpage = 17, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c index d83da5ddbc1e..8137e19d3292 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c @@ -26,22 +26,18 @@ #include <core/memory.h> -void +int gm200_fb_init_page(struct nvkm_fb *fb) { struct nvkm_device *device = fb->subdev.device; switch (fb->page) { - case 16: - nvkm_mask(device, 0x100c80, 0x00000801, 0x00000001); - break; - case 17: - nvkm_mask(device, 0x100c80, 0x00000801, 0x00000000); - break; + case 16: nvkm_mask(device, 0x100c80, 0x00001801, 0x00001001); break; + case 17: nvkm_mask(device, 0x100c80, 0x00001801, 0x00000000); break; + case 0: nvkm_mask(device, 0x100c80, 0x00001800, 0x00001800); break; default: - nvkm_mask(device, 0x100c80, 0x00000800, 0x00000800); - fb->page = 0; - break; + return -EINVAL; } + return 0; } void @@ -69,7 +65,7 @@ gm200_fb = { .init_page = gm200_fb_init_page, .intr = gf100_fb_intr, .ram_new = gm200_ram_new, - .memtype_valid = gf100_fb_memtype_valid, + .default_bigpage = 0 /* per-instance. */, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm20b.c index b87c233bcd6d..12db61e31128 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm20b.c @@ -30,7 +30,7 @@ gm20b_fb = { .init = gm200_fb_init, .init_page = gm200_fb_init_page, .intr = gf100_fb_intr, - .memtype_valid = gf100_fb_memtype_valid, + .default_bigpage = 0 /* per-instance. */, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c index 98474aec1921..147f69b30cd8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c @@ -59,7 +59,6 @@ gp100_fb = { .init_page = gm200_fb_init_page, .init_unkn = gp100_fb_init_unkn, .ram_new = gp100_ram_new, - .memtype_valid = gf100_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp102.c index 73b4ae1c73dc..b84b9861ef26 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp102.c @@ -33,7 +33,6 @@ gp102_fb = { .init = gp100_fb_init, .init_page = gm200_fb_init_page, .ram_new = gp100_ram_new, - .memtype_valid = gf100_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp10b.c index f2b1fbf428d5..af8e43979dc1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp10b.c @@ -28,7 +28,6 @@ gp10b_fb = { .init = gm200_fb_init, .init_page = gm200_fb_init_page, .intr = gf100_fb_intr, - .memtype_valid = gf100_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gt215.c index ebb30608d5ef..9266559b45f9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gt215.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gt215.c @@ -27,6 +27,7 @@ static const struct nv50_fb_func gt215_fb = { .ram_new = gt215_ram_new, + .tags = nv20_fb_tags, .trap = 0x000d0fff, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv04.c index 8ff2e5db4571..c886664533c8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv04.c @@ -25,14 +25,6 @@ #include "ram.h" #include "regsnv04.h" -bool -nv04_fb_memtype_valid(struct nvkm_fb *fb, u32 tile_flags) -{ - if (!(tile_flags & 0xff00)) - return true; - return false; -} - static void nv04_fb_init(struct nvkm_fb *fb) { @@ -49,7 +41,6 @@ static const struct nvkm_fb_func nv04_fb = { .init = nv04_fb_init, .ram_new = nv04_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv10.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv10.c index e8c44f5a3d84..c998b7e96aa3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv10.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv10.c @@ -61,7 +61,6 @@ nv10_fb = { .tile.fini = nv10_fb_tile_fini, .tile.prog = nv10_fb_tile_prog, .ram_new = nv10_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv1a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv1a.c index 2ae0beb87567..7b9f04f44af8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv1a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv1a.c @@ -33,7 +33,6 @@ nv1a_fb = { .tile.fini = nv10_fb_tile_fini, .tile.prog = nv10_fb_tile_prog, .ram_new = nv1a_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv20.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv20.c index 126865dfe777..a021d21ff153 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv20.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv20.c @@ -45,7 +45,7 @@ nv20_fb_tile_comp(struct nvkm_fb *fb, int i, u32 size, u32 flags, { u32 tiles = DIV_ROUND_UP(size, 0x40); u32 tags = round_up(tiles / fb->ram->parts, 0x40); - if (!nvkm_mm_head(&fb->ram->tags, 0, 1, tags, tags, 1, &tile->tag)) { + if (!nvkm_mm_head(&fb->tags, 0, 1, tags, tags, 1, &tile->tag)) { if (!(flags & 2)) tile->zcomp = 0x00000000; /* Z16 */ else tile->zcomp = 0x04000000; /* Z24S8 */ tile->zcomp |= tile->tag->offset; @@ -63,7 +63,7 @@ nv20_fb_tile_fini(struct nvkm_fb *fb, int i, struct nvkm_fb_tile *tile) tile->limit = 0; tile->pitch = 0; tile->zcomp = 0; - nvkm_mm_free(&fb->ram->tags, &tile->tag); + nvkm_mm_free(&fb->tags, &tile->tag); } void @@ -77,15 +77,22 @@ nv20_fb_tile_prog(struct nvkm_fb *fb, int i, struct nvkm_fb_tile *tile) nvkm_wr32(device, 0x100300 + (i * 0x04), tile->zcomp); } +u32 +nv20_fb_tags(struct nvkm_fb *fb) +{ + const u32 tags = nvkm_rd32(fb->subdev.device, 0x100320); + return tags ? tags + 1 : 0; +} + static const struct nvkm_fb_func nv20_fb = { + .tags = nv20_fb_tags, .tile.regions = 8, .tile.init = nv20_fb_tile_init, .tile.comp = nv20_fb_tile_comp, .tile.fini = nv20_fb_tile_fini, .tile.prog = nv20_fb_tile_prog, .ram_new = nv20_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv25.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv25.c index c56746d2a502..7709f5fe9a45 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv25.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv25.c @@ -32,7 +32,7 @@ nv25_fb_tile_comp(struct nvkm_fb *fb, int i, u32 size, u32 flags, { u32 tiles = DIV_ROUND_UP(size, 0x40); u32 tags = round_up(tiles / fb->ram->parts, 0x40); - if (!nvkm_mm_head(&fb->ram->tags, 0, 1, tags, tags, 1, &tile->tag)) { + if (!nvkm_mm_head(&fb->tags, 0, 1, tags, tags, 1, &tile->tag)) { if (!(flags & 2)) tile->zcomp = 0x00100000; /* Z16 */ else tile->zcomp = 0x00200000; /* Z24S8 */ tile->zcomp |= tile->tag->offset; @@ -44,13 +44,13 @@ nv25_fb_tile_comp(struct nvkm_fb *fb, int i, u32 size, u32 flags, static const struct nvkm_fb_func nv25_fb = { + .tags = nv20_fb_tags, .tile.regions = 8, .tile.init = nv20_fb_tile_init, .tile.comp = nv25_fb_tile_comp, .tile.fini = nv20_fb_tile_fini, .tile.prog = nv20_fb_tile_prog, .ram_new = nv20_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv30.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv30.c index 2a7c4831b821..8aa782666507 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv30.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv30.c @@ -51,7 +51,7 @@ nv30_fb_tile_comp(struct nvkm_fb *fb, int i, u32 size, u32 flags, { u32 tiles = DIV_ROUND_UP(size, 0x40); u32 tags = round_up(tiles / fb->ram->parts, 0x40); - if (!nvkm_mm_head(&fb->ram->tags, 0, 1, tags, tags, 1, &tile->tag)) { + if (!nvkm_mm_head(&fb->tags, 0, 1, tags, tags, 1, &tile->tag)) { if (flags & 2) tile->zcomp |= 0x01000000; /* Z16 */ else tile->zcomp |= 0x02000000; /* Z24S8 */ tile->zcomp |= ((tile->tag->offset ) >> 6); @@ -116,6 +116,7 @@ nv30_fb_init(struct nvkm_fb *fb) static const struct nvkm_fb_func nv30_fb = { + .tags = nv20_fb_tags, .init = nv30_fb_init, .tile.regions = 8, .tile.init = nv30_fb_tile_init, @@ -123,7 +124,6 @@ nv30_fb = { .tile.fini = nv20_fb_tile_fini, .tile.prog = nv20_fb_tile_prog, .ram_new = nv20_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv35.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv35.c index 1604b3789ad1..6e83dcff72e0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv35.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv35.c @@ -32,7 +32,7 @@ nv35_fb_tile_comp(struct nvkm_fb *fb, int i, u32 size, u32 flags, { u32 tiles = DIV_ROUND_UP(size, 0x40); u32 tags = round_up(tiles / fb->ram->parts, 0x40); - if (!nvkm_mm_head(&fb->ram->tags, 0, 1, tags, tags, 1, &tile->tag)) { + if (!nvkm_mm_head(&fb->tags, 0, 1, tags, tags, 1, &tile->tag)) { if (flags & 2) tile->zcomp |= 0x04000000; /* Z16 */ else tile->zcomp |= 0x08000000; /* Z24S8 */ tile->zcomp |= ((tile->tag->offset ) >> 6); @@ -45,6 +45,7 @@ nv35_fb_tile_comp(struct nvkm_fb *fb, int i, u32 size, u32 flags, static const struct nvkm_fb_func nv35_fb = { + .tags = nv20_fb_tags, .init = nv30_fb_init, .tile.regions = 8, .tile.init = nv30_fb_tile_init, @@ -52,7 +53,6 @@ nv35_fb = { .tile.fini = nv20_fb_tile_fini, .tile.prog = nv20_fb_tile_prog, .ram_new = nv20_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv36.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv36.c index 80cc0a6e3416..2a07617bb44c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv36.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv36.c @@ -32,7 +32,7 @@ nv36_fb_tile_comp(struct nvkm_fb *fb, int i, u32 size, u32 flags, { u32 tiles = DIV_ROUND_UP(size, 0x40); u32 tags = round_up(tiles / fb->ram->parts, 0x40); - if (!nvkm_mm_head(&fb->ram->tags, 0, 1, tags, tags, 1, &tile->tag)) { + if (!nvkm_mm_head(&fb->tags, 0, 1, tags, tags, 1, &tile->tag)) { if (flags & 2) tile->zcomp |= 0x10000000; /* Z16 */ else tile->zcomp |= 0x20000000; /* Z24S8 */ tile->zcomp |= ((tile->tag->offset ) >> 6); @@ -45,6 +45,7 @@ nv36_fb_tile_comp(struct nvkm_fb *fb, int i, u32 size, u32 flags, static const struct nvkm_fb_func nv36_fb = { + .tags = nv20_fb_tags, .init = nv30_fb_init, .tile.regions = 8, .tile.init = nv30_fb_tile_init, @@ -52,7 +53,6 @@ nv36_fb = { .tile.fini = nv20_fb_tile_fini, .tile.prog = nv20_fb_tile_prog, .ram_new = nv20_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv40.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv40.c index deec46a310f8..955160778b5b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv40.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv40.c @@ -33,7 +33,7 @@ nv40_fb_tile_comp(struct nvkm_fb *fb, int i, u32 size, u32 flags, u32 tiles = DIV_ROUND_UP(size, 0x80); u32 tags = round_up(tiles / fb->ram->parts, 0x100); if ( (flags & 2) && - !nvkm_mm_head(&fb->ram->tags, 0, 1, tags, tags, 1, &tile->tag)) { + !nvkm_mm_head(&fb->tags, 0, 1, tags, tags, 1, &tile->tag)) { tile->zcomp = 0x28000000; /* Z24S8_SPLIT_GRAD */ tile->zcomp |= ((tile->tag->offset ) >> 8); tile->zcomp |= ((tile->tag->offset + tags - 1) >> 8) << 13; @@ -51,6 +51,7 @@ nv40_fb_init(struct nvkm_fb *fb) static const struct nvkm_fb_func nv40_fb = { + .tags = nv20_fb_tags, .init = nv40_fb_init, .tile.regions = 8, .tile.init = nv30_fb_tile_init, @@ -58,7 +59,6 @@ nv40_fb = { .tile.fini = nv20_fb_tile_fini, .tile.prog = nv20_fb_tile_prog, .ram_new = nv40_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv41.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv41.c index 79e57dd5a00f..b77f08d34cc3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv41.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv41.c @@ -45,6 +45,7 @@ nv41_fb_init(struct nvkm_fb *fb) static const struct nvkm_fb_func nv41_fb = { + .tags = nv20_fb_tags, .init = nv41_fb_init, .tile.regions = 12, .tile.init = nv30_fb_tile_init, @@ -52,7 +53,6 @@ nv41_fb = { .tile.fini = nv20_fb_tile_fini, .tile.prog = nv41_fb_tile_prog, .ram_new = nv41_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv44.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv44.c index 06246cce5ec4..b59dc486083d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv44.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv44.c @@ -62,7 +62,6 @@ nv44_fb = { .tile.fini = nv20_fb_tile_fini, .tile.prog = nv44_fb_tile_prog, .ram_new = nv44_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv46.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv46.c index 3598a1aa65be..cab7d20fa039 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv46.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv46.c @@ -48,7 +48,6 @@ nv46_fb = { .tile.fini = nv20_fb_tile_fini, .tile.prog = nv44_fb_tile_prog, .ram_new = nv44_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv47.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv47.c index c505e4429314..a8b0ad4c871d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv47.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv47.c @@ -28,6 +28,7 @@ static const struct nvkm_fb_func nv47_fb = { + .tags = nv20_fb_tags, .init = nv41_fb_init, .tile.regions = 15, .tile.init = nv30_fb_tile_init, @@ -35,7 +36,6 @@ nv47_fb = { .tile.fini = nv20_fb_tile_fini, .tile.prog = nv41_fb_tile_prog, .ram_new = nv41_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv49.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv49.c index 7b91b9f170e5..d0b317bb0252 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv49.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv49.c @@ -28,6 +28,7 @@ static const struct nvkm_fb_func nv49_fb = { + .tags = nv20_fb_tags, .init = nv41_fb_init, .tile.regions = 15, .tile.init = nv30_fb_tile_init, @@ -35,7 +36,6 @@ nv49_fb = { .tile.fini = nv20_fb_tile_fini, .tile.prog = nv41_fb_tile_prog, .ram_new = nv49_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv4e.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv4e.c index 4e98210c1b1c..6a6f0c086071 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv4e.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv4e.c @@ -34,7 +34,6 @@ nv4e_fb = { .tile.fini = nv20_fb_tile_fini, .tile.prog = nv44_fb_tile_prog, .ram_new = nv44_ram_new, - .memtype_valid = nv04_fb_memtype_valid, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c index 0595e0722bfc..b2f5bf8144ea 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c @@ -28,18 +28,6 @@ #include <core/enum.h> #include <engine/fifo.h> -int -nv50_fb_memtype[0x80] = { - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0, 0, - 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 2, 2, 2, 2, - 1, 0, 2, 0, 1, 0, 2, 0, 1, 1, 2, 2, 1, 1, 0, 0 -}; - static int nv50_fb_ram_new(struct nvkm_fb *base, struct nvkm_ram **pram) { @@ -47,12 +35,6 @@ nv50_fb_ram_new(struct nvkm_fb *base, struct nvkm_ram **pram) return fb->func->ram_new(&fb->base, pram); } -static bool -nv50_fb_memtype_valid(struct nvkm_fb *fb, u32 memtype) -{ - return nv50_fb_memtype[(memtype & 0xff00) >> 8] != 0; -} - static const struct nvkm_enum vm_dispatch_subclients[] = { { 0x00000000, "GRCTX" }, { 0x00000001, "NOTIFY" }, @@ -244,6 +226,15 @@ nv50_fb_init(struct nvkm_fb *base) nvkm_wr32(device, 0x100c90, fb->func->trap); } +static u32 +nv50_fb_tags(struct nvkm_fb *base) +{ + struct nv50_fb *fb = nv50_fb(base); + if (fb->func->tags) + return fb->func->tags(&fb->base); + return 0; +} + static void * nv50_fb_dtor(struct nvkm_fb *base) { @@ -262,11 +253,11 @@ nv50_fb_dtor(struct nvkm_fb *base) static const struct nvkm_fb_func nv50_fb_ = { .dtor = nv50_fb_dtor, + .tags = nv50_fb_tags, .oneinit = nv50_fb_oneinit, .init = nv50_fb_init, .intr = nv50_fb_intr, .ram_new = nv50_fb_ram_new, - .memtype_valid = nv50_fb_memtype_valid, }; int @@ -287,6 +278,7 @@ nv50_fb_new_(const struct nv50_fb_func *func, struct nvkm_device *device, static const struct nv50_fb_func nv50_fb = { .ram_new = nv50_ram_new, + .tags = nv20_fb_tags, .trap = 0x000707ff, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.h index faa88c8c66fe..dacc696387b6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_FB_NV50_H__ #define __NVKM_FB_NV50_H__ #define nv50_fb(p) container_of((p), struct nv50_fb, base) @@ -12,10 +13,10 @@ struct nv50_fb { struct nv50_fb_func { int (*ram_new)(struct nvkm_fb *, struct nvkm_ram **); + u32 (*tags)(struct nvkm_fb *); u32 trap; }; int nv50_fb_new_(const struct nv50_fb_func *, struct nvkm_device *, int index, struct nvkm_fb **pfb); -extern int nv50_fb_memtype[0x80]; #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h index e905d44fa1d5..9351188d5d76 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_FB_PRIV_H__ #define __NVKM_FB_PRIV_H__ #define nvkm_fb(p) container_of((p), struct nvkm_fb, subdev) @@ -6,9 +7,10 @@ struct nvkm_bios; struct nvkm_fb_func { void *(*dtor)(struct nvkm_fb *); + u32 (*tags)(struct nvkm_fb *); int (*oneinit)(struct nvkm_fb *); void (*init)(struct nvkm_fb *); - void (*init_page)(struct nvkm_fb *); + int (*init_page)(struct nvkm_fb *); void (*init_unkn)(struct nvkm_fb *); void (*intr)(struct nvkm_fb *); @@ -24,7 +26,7 @@ struct nvkm_fb_func { int (*ram_new)(struct nvkm_fb *, struct nvkm_ram **); - bool (*memtype_valid)(struct nvkm_fb *, u32 memtype); + u8 default_bigpage; }; void nvkm_fb_ctor(const struct nvkm_fb_func *, struct nvkm_device *device, @@ -33,13 +35,12 @@ int nvkm_fb_new_(const struct nvkm_fb_func *, struct nvkm_device *device, int index, struct nvkm_fb **); int nvkm_fb_bios_memtype(struct nvkm_bios *); -bool nv04_fb_memtype_valid(struct nvkm_fb *, u32 memtype); - void nv10_fb_tile_init(struct nvkm_fb *, int i, u32 addr, u32 size, u32 pitch, u32 flags, struct nvkm_fb_tile *); void nv10_fb_tile_fini(struct nvkm_fb *, int i, struct nvkm_fb_tile *); void nv10_fb_tile_prog(struct nvkm_fb *, int, struct nvkm_fb_tile *); +u32 nv20_fb_tags(struct nvkm_fb *); void nv20_fb_tile_init(struct nvkm_fb *, int i, u32 addr, u32 size, u32 pitch, u32 flags, struct nvkm_fb_tile *); void nv20_fb_tile_fini(struct nvkm_fb *, int i, struct nvkm_fb_tile *); @@ -62,8 +63,7 @@ void nv46_fb_tile_init(struct nvkm_fb *, int i, u32 addr, u32 size, u32 pitch, u32 flags, struct nvkm_fb_tile *); int gf100_fb_oneinit(struct nvkm_fb *); -void gf100_fb_init_page(struct nvkm_fb *); -bool gf100_fb_memtype_valid(struct nvkm_fb *, u32); +int gf100_fb_init_page(struct nvkm_fb *); -void gm200_fb_init_page(struct nvkm_fb *); +int gm200_fb_init_page(struct nvkm_fb *); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.c index c17d559dbfbe..24c7bd505731 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.c @@ -21,8 +21,132 @@ * * Authors: Ben Skeggs <bskeggs@redhat.com> */ +#define nvkm_vram(p) container_of((p), struct nvkm_vram, memory) #include "ram.h" +#include <core/memory.h> +#include <subdev/mmu.h> + +struct nvkm_vram { + struct nvkm_memory memory; + struct nvkm_ram *ram; + u8 page; + struct nvkm_mm_node *mn; +}; + +static int +nvkm_vram_map(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm, + struct nvkm_vma *vma, void *argv, u32 argc) +{ + struct nvkm_vram *vram = nvkm_vram(memory); + struct nvkm_vmm_map map = { + .memory = &vram->memory, + .offset = offset, + .mem = vram->mn, + }; + + return nvkm_vmm_map(vmm, vma, argv, argc, &map); +} + +static u64 +nvkm_vram_size(struct nvkm_memory *memory) +{ + return (u64)nvkm_mm_size(nvkm_vram(memory)->mn) << NVKM_RAM_MM_SHIFT; +} + +static u64 +nvkm_vram_addr(struct nvkm_memory *memory) +{ + struct nvkm_vram *vram = nvkm_vram(memory); + if (!nvkm_mm_contiguous(vram->mn)) + return ~0ULL; + return (u64)nvkm_mm_addr(vram->mn) << NVKM_RAM_MM_SHIFT; +} + +static u8 +nvkm_vram_page(struct nvkm_memory *memory) +{ + return nvkm_vram(memory)->page; +} + +static enum nvkm_memory_target +nvkm_vram_target(struct nvkm_memory *memory) +{ + return NVKM_MEM_TARGET_VRAM; +} + +static void * +nvkm_vram_dtor(struct nvkm_memory *memory) +{ + struct nvkm_vram *vram = nvkm_vram(memory); + struct nvkm_mm_node *next = vram->mn; + struct nvkm_mm_node *node; + mutex_lock(&vram->ram->fb->subdev.mutex); + while ((node = next)) { + next = node->next; + nvkm_mm_free(&vram->ram->vram, &node); + } + mutex_unlock(&vram->ram->fb->subdev.mutex); + return vram; +} + +static const struct nvkm_memory_func +nvkm_vram = { + .dtor = nvkm_vram_dtor, + .target = nvkm_vram_target, + .page = nvkm_vram_page, + .addr = nvkm_vram_addr, + .size = nvkm_vram_size, + .map = nvkm_vram_map, +}; + +int +nvkm_ram_get(struct nvkm_device *device, u8 heap, u8 type, u8 rpage, u64 size, + bool contig, bool back, struct nvkm_memory **pmemory) +{ + struct nvkm_ram *ram; + struct nvkm_mm *mm; + struct nvkm_mm_node **node, *r; + struct nvkm_vram *vram; + u8 page = max(rpage, (u8)NVKM_RAM_MM_SHIFT); + u32 align = (1 << page) >> NVKM_RAM_MM_SHIFT; + u32 max = ALIGN(size, 1 << page) >> NVKM_RAM_MM_SHIFT; + u32 min = contig ? max : align; + int ret; + + if (!device->fb || !(ram = device->fb->ram)) + return -ENODEV; + ram = device->fb->ram; + mm = &ram->vram; + + if (!(vram = kzalloc(sizeof(*vram), GFP_KERNEL))) + return -ENOMEM; + nvkm_memory_ctor(&nvkm_vram, &vram->memory); + vram->ram = ram; + vram->page = page; + *pmemory = &vram->memory; + + mutex_lock(&ram->fb->subdev.mutex); + node = &vram->mn; + do { + if (back) + ret = nvkm_mm_tail(mm, heap, type, max, min, align, &r); + else + ret = nvkm_mm_head(mm, heap, type, max, min, align, &r); + if (ret) { + mutex_unlock(&ram->fb->subdev.mutex); + nvkm_memory_unref(pmemory); + return ret; + } + + *node = r; + node = &r->next; + max -= r->length; + } while (max); + mutex_unlock(&ram->fb->subdev.mutex); + return 0; +} + int nvkm_ram_init(struct nvkm_ram *ram) { @@ -38,7 +162,6 @@ nvkm_ram_del(struct nvkm_ram **pram) if (ram && !WARN_ON(!ram->func)) { if (ram->func->dtor) *pram = ram->func->dtor(ram); - nvkm_mm_fini(&ram->tags); nvkm_mm_fini(&ram->vram); kfree(*pram); *pram = NULL; @@ -47,8 +170,7 @@ nvkm_ram_del(struct nvkm_ram **pram) int nvkm_ram_ctor(const struct nvkm_ram_func *func, struct nvkm_fb *fb, - enum nvkm_ram_type type, u64 size, u32 tags, - struct nvkm_ram *ram) + enum nvkm_ram_type type, u64 size, struct nvkm_ram *ram) { static const char *name[] = { [NVKM_RAM_TYPE_UNKNOWN] = "of unknown memory type", @@ -73,28 +195,20 @@ nvkm_ram_ctor(const struct nvkm_ram_func *func, struct nvkm_fb *fb, ram->size = size; if (!nvkm_mm_initialised(&ram->vram)) { - ret = nvkm_mm_init(&ram->vram, 0, size >> NVKM_RAM_MM_SHIFT, 1); + ret = nvkm_mm_init(&ram->vram, NVKM_RAM_MM_NORMAL, 0, + size >> NVKM_RAM_MM_SHIFT, 1); if (ret) return ret; } - if (!nvkm_mm_initialised(&ram->tags)) { - ret = nvkm_mm_init(&ram->tags, 0, tags ? ++tags : 0, 1); - if (ret) - return ret; - - nvkm_debug(subdev, "%d compression tags\n", tags); - } - return 0; } int nvkm_ram_new_(const struct nvkm_ram_func *func, struct nvkm_fb *fb, - enum nvkm_ram_type type, u64 size, u32 tags, - struct nvkm_ram **pram) + enum nvkm_ram_type type, u64 size, struct nvkm_ram **pram) { if (!(*pram = kzalloc(sizeof(**pram), GFP_KERNEL))) return -ENOMEM; - return nvkm_ram_ctor(func, fb, type, size, tags, *pram); + return nvkm_ram_ctor(func, fb, type, size, *pram); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h index fac7e73c3ddf..330132e95b6f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h @@ -1,13 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_FB_RAM_PRIV_H__ #define __NVKM_FB_RAM_PRIV_H__ #include "priv.h" int nvkm_ram_ctor(const struct nvkm_ram_func *, struct nvkm_fb *, - enum nvkm_ram_type, u64 size, u32 tags, - struct nvkm_ram *); + enum nvkm_ram_type, u64 size, struct nvkm_ram *); int nvkm_ram_new_(const struct nvkm_ram_func *, struct nvkm_fb *, - enum nvkm_ram_type, u64 size, u32 tags, - struct nvkm_ram **); + enum nvkm_ram_type, u64 size, struct nvkm_ram **); void nvkm_ram_del(struct nvkm_ram **); int nvkm_ram_init(struct nvkm_ram *); @@ -15,9 +14,6 @@ extern const struct nvkm_ram_func nv04_ram_func; int nv50_ram_ctor(const struct nvkm_ram_func *, struct nvkm_fb *, struct nvkm_ram *); -int nv50_ram_get(struct nvkm_ram *, u64, u32, u32, u32, struct nvkm_mem **); -void nv50_ram_put(struct nvkm_ram *, struct nvkm_mem **); -void __nv50_ram_put(struct nvkm_ram *, struct nvkm_mem *); int gf100_ram_new_(const struct nvkm_ram_func *, struct nvkm_fb *, struct nvkm_ram **); @@ -28,8 +24,6 @@ u32 gf100_ram_probe_fbp(const struct nvkm_ram_func *, u32 gf100_ram_probe_fbp_amount(const struct nvkm_ram_func *, u32, struct nvkm_device *, int, int *); u32 gf100_ram_probe_fbpa_amount(struct nvkm_device *, int); -int gf100_ram_get(struct nvkm_ram *, u64, u32, u32, u32, struct nvkm_mem **); -void gf100_ram_put(struct nvkm_ram *, struct nvkm_mem **); int gf100_ram_init(struct nvkm_ram *); int gf100_ram_calc(struct nvkm_ram *, u32); int gf100_ram_prog(struct nvkm_ram *); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramfuc.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramfuc.h index 9ef9d6aa3721..a65fa5586af8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramfuc.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramfuc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_FBRAM_FUC_H__ #define __NVKM_FBRAM_FUC_H__ #include <subdev/fb.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c index 4a9bd4f1cb93..ac87a3b6b7c9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c @@ -32,7 +32,6 @@ #include <subdev/bios/timing.h> #include <subdev/clk.h> #include <subdev/clk/pll.h> -#include <subdev/ltc.h> struct gf100_ramfuc { struct ramfuc base; @@ -420,86 +419,6 @@ gf100_ram_tidy(struct nvkm_ram *base) ram_exec(&ram->fuc, false); } -void -gf100_ram_put(struct nvkm_ram *ram, struct nvkm_mem **pmem) -{ - struct nvkm_ltc *ltc = ram->fb->subdev.device->ltc; - struct nvkm_mem *mem = *pmem; - - *pmem = NULL; - if (unlikely(mem == NULL)) - return; - - mutex_lock(&ram->fb->subdev.mutex); - if (mem->tag) - nvkm_ltc_tags_free(ltc, &mem->tag); - __nv50_ram_put(ram, mem); - mutex_unlock(&ram->fb->subdev.mutex); - - kfree(mem); -} - -int -gf100_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin, - u32 memtype, struct nvkm_mem **pmem) -{ - struct nvkm_ltc *ltc = ram->fb->subdev.device->ltc; - struct nvkm_mm *mm = &ram->vram; - struct nvkm_mm_node **node, *r; - struct nvkm_mem *mem; - int type = (memtype & 0x0ff); - int back = (memtype & 0x800); - const bool comp = gf100_pte_storage_type_map[type] != type; - int ret; - - size >>= NVKM_RAM_MM_SHIFT; - align >>= NVKM_RAM_MM_SHIFT; - ncmin >>= NVKM_RAM_MM_SHIFT; - if (!ncmin) - ncmin = size; - - mem = kzalloc(sizeof(*mem), GFP_KERNEL); - if (!mem) - return -ENOMEM; - - mem->size = size; - - mutex_lock(&ram->fb->subdev.mutex); - if (comp) { - /* compression only works with lpages */ - if (align == (1 << (17 - NVKM_RAM_MM_SHIFT))) { - int n = size >> 5; - nvkm_ltc_tags_alloc(ltc, n, &mem->tag); - } - - if (unlikely(!mem->tag)) - type = gf100_pte_storage_type_map[type]; - } - mem->memtype = type; - - node = &mem->mem; - do { - if (back) - ret = nvkm_mm_tail(mm, 0, 1, size, ncmin, align, &r); - else - ret = nvkm_mm_head(mm, 0, 1, size, ncmin, align, &r); - if (ret) { - mutex_unlock(&ram->fb->subdev.mutex); - ram->func->put(ram, &mem); - return ret; - } - - *node = r; - node = &r->next; - size -= r->length; - } while (size); - mutex_unlock(&ram->fb->subdev.mutex); - - mem->offset = (u64)mem->mem->offset << NVKM_RAM_MM_SHIFT; - *pmem = mem; - return 0; -} - int gf100_ram_init(struct nvkm_ram *base) { @@ -604,7 +523,7 @@ gf100_ram_ctor(const struct nvkm_ram_func *func, struct nvkm_fb *fb, nvkm_debug(subdev, "Upper: %4lld MiB @ %010llx\n", usize >> 20, ubase); nvkm_debug(subdev, "Total: %4lld MiB\n", total >> 20); - ret = nvkm_ram_ctor(func, fb, type, total, 0, ram); + ret = nvkm_ram_ctor(func, fb, type, total, ram); if (ret) return ret; @@ -617,7 +536,8 @@ gf100_ram_ctor(const struct nvkm_ram_func *func, struct nvkm_fb *fb, */ if (lower != total) { /* The common memory amount is addressed normally. */ - ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT, + ret = nvkm_mm_init(&ram->vram, NVKM_RAM_MM_NORMAL, + rsvd_head >> NVKM_RAM_MM_SHIFT, (lower - rsvd_head) >> NVKM_RAM_MM_SHIFT, 1); if (ret) return ret; @@ -625,13 +545,15 @@ gf100_ram_ctor(const struct nvkm_ram_func *func, struct nvkm_fb *fb, /* And the rest is much higher in the physical address * space, and may not be usable for certain operations. */ - ret = nvkm_mm_init(&ram->vram, ubase >> NVKM_RAM_MM_SHIFT, + ret = nvkm_mm_init(&ram->vram, NVKM_RAM_MM_MIXED, + ubase >> NVKM_RAM_MM_SHIFT, (usize - rsvd_tail) >> NVKM_RAM_MM_SHIFT, 1); if (ret) return ret; } else { /* GPUs without mixed-memory are a lot nicer... */ - ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT, + ret = nvkm_mm_init(&ram->vram, NVKM_RAM_MM_NORMAL, + rsvd_head >> NVKM_RAM_MM_SHIFT, (total - rsvd_head - rsvd_tail) >> NVKM_RAM_MM_SHIFT, 1); if (ret) @@ -738,8 +660,6 @@ gf100_ram = { .probe_fbp_amount = gf100_ram_probe_fbp_amount, .probe_fbpa_amount = gf100_ram_probe_fbpa_amount, .init = gf100_ram_init, - .get = gf100_ram_get, - .put = gf100_ram_put, .calc = gf100_ram_calc, .prog = gf100_ram_prog, .tidy = gf100_ram_tidy, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf108.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf108.c index 985ec64cf369..70a06e3cd55a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf108.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf108.c @@ -48,8 +48,6 @@ gf108_ram = { .probe_fbp_amount = gf108_ram_probe_fbp_amount, .probe_fbpa_amount = gf100_ram_probe_fbpa_amount, .init = gf100_ram_init, - .get = gf100_ram_get, - .put = gf100_ram_put, .calc = gf100_ram_calc, .prog = gf100_ram_prog, .tidy = gf100_ram_tidy, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c index 75814f15eb53..8bcb7e79a0cb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c @@ -1704,8 +1704,6 @@ gk104_ram = { .probe_fbpa_amount = gf100_ram_probe_fbpa_amount, .dtor = gk104_ram_dtor, .init = gk104_ram_init, - .get = gf100_ram_get, - .put = gf100_ram_put, .calc = gk104_ram_calc, .prog = gk104_ram_prog, .tidy = gk104_ram_tidy, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c index 3f0b56347291..27c68e3f9772 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c @@ -39,8 +39,6 @@ gm107_ram = { .probe_fbpa_amount = gf100_ram_probe_fbpa_amount, .dtor = gk104_ram_dtor, .init = gk104_ram_init, - .get = gf100_ram_get, - .put = gf100_ram_put, .calc = gk104_ram_calc, .prog = gk104_ram_prog, .tidy = gk104_ram_tidy, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm200.c index fd8facf90476..6b0cac1fe7b4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm200.c @@ -54,8 +54,6 @@ gm200_ram = { .probe_fbpa_amount = gf100_ram_probe_fbpa_amount, .dtor = gk104_ram_dtor, .init = gk104_ram_init, - .get = gf100_ram_get, - .put = gf100_ram_put, .calc = gk104_ram_calc, .prog = gk104_ram_prog, .tidy = gk104_ram_tidy, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c index df8a87333b67..adb62a6beb63 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c @@ -84,8 +84,6 @@ gp100_ram = { .probe_fbp_amount = gm200_ram_probe_fbp_amount, .probe_fbpa_amount = gp100_ram_probe_fbpa, .init = gp100_ram_init, - .get = gf100_ram_get, - .put = gf100_ram_put, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c index f10664372161..920b3d347803 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c @@ -26,6 +26,7 @@ #include "ram.h" #include "ramfuc.h" +#include <core/memory.h> #include <core/option.h> #include <subdev/bios.h> #include <subdev/bios/M0205.h> @@ -86,7 +87,7 @@ struct gt215_ltrain { u32 r_100720; u32 r_1111e0; u32 r_111400; - struct nvkm_mem *mem; + struct nvkm_memory *memory; }; struct gt215_ram { @@ -279,10 +280,10 @@ gt215_link_train_init(struct gt215_ram *ram) struct gt215_ltrain *train = &ram->ltrain; struct nvkm_device *device = ram->base.fb->subdev.device; struct nvkm_bios *bios = device->bios; - struct nvkm_mem *mem; struct nvbios_M0205E M0205E; u8 ver, hdr, cnt, len; u32 r001700; + u64 addr; int ret, i = 0; train->state = NVA3_TRAIN_UNSUPPORTED; @@ -297,14 +298,14 @@ gt215_link_train_init(struct gt215_ram *ram) train->state = NVA3_TRAIN_ONCE; - ret = ram->base.func->get(&ram->base, 0x8000, 0x10000, 0, 0x800, - &ram->ltrain.mem); + ret = nvkm_ram_get(device, NVKM_RAM_MM_NORMAL, 0x01, 16, 0x8000, + true, true, &ram->ltrain.memory); if (ret) return ret; - mem = ram->ltrain.mem; + addr = nvkm_memory_addr(ram->ltrain.memory); - nvkm_wr32(device, 0x100538, 0x10000000 | (mem->offset >> 16)); + nvkm_wr32(device, 0x100538, 0x10000000 | (addr >> 16)); nvkm_wr32(device, 0x1005a8, 0x0000ffff); nvkm_mask(device, 0x10f800, 0x00000001, 0x00000001); @@ -320,7 +321,7 @@ gt215_link_train_init(struct gt215_ram *ram) /* And upload the pattern */ r001700 = nvkm_rd32(device, 0x1700); - nvkm_wr32(device, 0x1700, mem->offset >> 16); + nvkm_wr32(device, 0x1700, addr >> 16); for (i = 0; i < 16; i++) nvkm_wr32(device, 0x700000 + (i << 2), pattern[i]); for (i = 0; i < 16; i++) @@ -336,8 +337,7 @@ gt215_link_train_init(struct gt215_ram *ram) static void gt215_link_train_fini(struct gt215_ram *ram) { - if (ram->ltrain.mem) - ram->base.func->put(&ram->base, &ram->ltrain.mem); + nvkm_memory_unref(&ram->ltrain.memory); } /* @@ -931,8 +931,6 @@ static const struct nvkm_ram_func gt215_ram_func = { .dtor = gt215_ram_dtor, .init = gt215_ram_init, - .get = nv50_ram_get, - .put = nv50_ram_put, .calc = gt215_ram_calc, .prog = gt215_ram_prog, .tidy = gt215_ram_tidy, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c index 017a91de74a0..7de18e53ef45 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c @@ -53,8 +53,6 @@ mcp77_ram_init(struct nvkm_ram *base) static const struct nvkm_ram_func mcp77_ram_func = { .init = mcp77_ram_init, - .get = nv50_ram_get, - .put = nv50_ram_put, }; int @@ -73,7 +71,7 @@ mcp77_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) *pram = &ram->base; ret = nvkm_ram_ctor(&mcp77_ram_func, fb, NVKM_RAM_TYPE_STOLEN, - size, 0, &ram->base); + size, &ram->base); if (ret) return ret; @@ -81,7 +79,8 @@ mcp77_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) ram->base.stolen = base; nvkm_mm_fini(&ram->base.vram); - return nvkm_mm_init(&ram->base.vram, rsvd_head >> NVKM_RAM_MM_SHIFT, + return nvkm_mm_init(&ram->base.vram, NVKM_RAM_MM_NORMAL, + rsvd_head >> NVKM_RAM_MM_SHIFT, (size - rsvd_head - rsvd_tail) >> NVKM_RAM_MM_SHIFT, 1); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv04.c index 6f053a03d61c..cc764a93f1a3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv04.c @@ -61,5 +61,5 @@ nv04_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) else type = NVKM_RAM_TYPE_SDRAM; - return nvkm_ram_new_(&nv04_ram_func, fb, type, size, 0, pram); + return nvkm_ram_new_(&nv04_ram_func, fb, type, size, pram); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv10.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv10.c index dfd155c98dbb..afe54e323b18 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv10.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv10.c @@ -36,5 +36,5 @@ nv10_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) else type = NVKM_RAM_TYPE_SDRAM; - return nvkm_ram_new_(&nv04_ram_func, fb, type, size, 0, pram); + return nvkm_ram_new_(&nv04_ram_func, fb, type, size, pram); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv1a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv1a.c index 3c6a8710e812..4c07d10bb976 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv1a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv1a.c @@ -44,5 +44,5 @@ nv1a_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) } return nvkm_ram_new_(&nv04_ram_func, fb, NVKM_RAM_TYPE_STOLEN, - mib * 1024 * 1024, 0, pram); + mib * 1024 * 1024, pram); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv20.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv20.c index 747e47c10cc7..71d63d7daa75 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv20.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv20.c @@ -29,7 +29,6 @@ nv20_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) struct nvkm_device *device = fb->subdev.device; u32 pbus1218 = nvkm_rd32(device, 0x001218); u32 size = (nvkm_rd32(device, 0x10020c) & 0xff000000); - u32 tags = nvkm_rd32(device, 0x100320); enum nvkm_ram_type type = NVKM_RAM_TYPE_UNKNOWN; int ret; @@ -40,7 +39,7 @@ nv20_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) case 0x00000300: type = NVKM_RAM_TYPE_GDDR2; break; } - ret = nvkm_ram_new_(&nv04_ram_func, fb, type, size, tags, pram); + ret = nvkm_ram_new_(&nv04_ram_func, fb, type, size, pram); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv40.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv40.c index 70c63535d56b..2b12e388f47a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv40.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv40.c @@ -187,13 +187,13 @@ nv40_ram_func = { int nv40_ram_new_(struct nvkm_fb *fb, enum nvkm_ram_type type, u64 size, - u32 tags, struct nvkm_ram **pram) + struct nvkm_ram **pram) { struct nv40_ram *ram; if (!(ram = kzalloc(sizeof(*ram), GFP_KERNEL))) return -ENOMEM; *pram = &ram->base; - return nvkm_ram_ctor(&nv40_ram_func, fb, type, size, tags, &ram->base); + return nvkm_ram_ctor(&nv40_ram_func, fb, type, size, &ram->base); } int @@ -202,7 +202,6 @@ nv40_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) struct nvkm_device *device = fb->subdev.device; u32 pbus1218 = nvkm_rd32(device, 0x001218); u32 size = nvkm_rd32(device, 0x10020c) & 0xff000000; - u32 tags = nvkm_rd32(device, 0x100320); enum nvkm_ram_type type = NVKM_RAM_TYPE_UNKNOWN; int ret; @@ -213,7 +212,7 @@ nv40_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) case 0x00000300: type = NVKM_RAM_TYPE_DDR2 ; break; } - ret = nv40_ram_new_(fb, type, size, tags, pram); + ret = nv40_ram_new_(fb, type, size, pram); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv40.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv40.h index 8a0524566b48..11f6bb2936b9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv40.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv40.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NV40_FB_RAM_H__ #define __NV40_FB_RAM_H__ #define nv40_ram(p) container_of((p), struct nv40_ram, base) @@ -9,6 +10,6 @@ struct nv40_ram { u32 coef; }; -int nv40_ram_new_(struct nvkm_fb *fb, enum nvkm_ram_type, u64, u32, +int nv40_ram_new_(struct nvkm_fb *fb, enum nvkm_ram_type, u64, struct nvkm_ram **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv41.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv41.c index 114828be292e..d3fea3726461 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv41.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv41.c @@ -28,7 +28,6 @@ nv41_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) { struct nvkm_device *device = fb->subdev.device; u32 size = nvkm_rd32(device, 0x10020c) & 0xff000000; - u32 tags = nvkm_rd32(device, 0x100320); u32 fb474 = nvkm_rd32(device, 0x100474); enum nvkm_ram_type type = NVKM_RAM_TYPE_UNKNOWN; int ret; @@ -40,7 +39,7 @@ nv41_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) if (fb474 & 0x00000001) type = NVKM_RAM_TYPE_DDR1; - ret = nv40_ram_new_(fb, type, size, tags, pram); + ret = nv40_ram_new_(fb, type, size, pram); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv44.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv44.c index bc56fbf1c788..ab2630e5e6fb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv44.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv44.c @@ -38,5 +38,5 @@ nv44_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) if (fb474 & 0x00000001) type = NVKM_RAM_TYPE_DDR1; - return nv40_ram_new_(fb, type, size, 0, pram); + return nv40_ram_new_(fb, type, size, pram); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv49.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv49.c index c01f4b1022b8..946ca7c2e0b6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv49.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv49.c @@ -28,7 +28,6 @@ nv49_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) { struct nvkm_device *device = fb->subdev.device; u32 size = nvkm_rd32(device, 0x10020c) & 0xff000000; - u32 tags = nvkm_rd32(device, 0x100320); u32 fb914 = nvkm_rd32(device, 0x100914); enum nvkm_ram_type type = NVKM_RAM_TYPE_UNKNOWN; int ret; @@ -40,7 +39,7 @@ nv49_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) case 0x00000003: break; } - ret = nv40_ram_new_(fb, type, size, tags, pram); + ret = nv40_ram_new_(fb, type, size, pram); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv4e.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv4e.c index fa3c2e06203d..02b8bdbc819f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv4e.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv4e.c @@ -29,5 +29,5 @@ nv4e_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) struct nvkm_device *device = fb->subdev.device; u32 size = nvkm_rd32(device, 0x10020c) & 0xff000000; return nvkm_ram_new_(&nv04_ram_func, fb, NVKM_RAM_TYPE_UNKNOWN, - size, 0, pram); + size, pram); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c index 6549b0588309..2ccb4b6be153 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c @@ -493,100 +493,8 @@ nv50_ram_tidy(struct nvkm_ram *base) ram_exec(&ram->hwsq, false); } -void -__nv50_ram_put(struct nvkm_ram *ram, struct nvkm_mem *mem) -{ - struct nvkm_mm_node *next = mem->mem; - struct nvkm_mm_node *node; - while ((node = next)) { - next = node->next; - nvkm_mm_free(&ram->vram, &node); - } - nvkm_mm_free(&ram->tags, &mem->tag); -} - -void -nv50_ram_put(struct nvkm_ram *ram, struct nvkm_mem **pmem) -{ - struct nvkm_mem *mem = *pmem; - - *pmem = NULL; - if (unlikely(mem == NULL)) - return; - - mutex_lock(&ram->fb->subdev.mutex); - __nv50_ram_put(ram, mem); - mutex_unlock(&ram->fb->subdev.mutex); - - kfree(mem); -} - -int -nv50_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin, - u32 memtype, struct nvkm_mem **pmem) -{ - struct nvkm_mm *heap = &ram->vram; - struct nvkm_mm *tags = &ram->tags; - struct nvkm_mm_node **node, *r; - struct nvkm_mem *mem; - int comp = (memtype & 0x300) >> 8; - int type = (memtype & 0x07f); - int back = (memtype & 0x800); - int min, max, ret; - - max = (size >> NVKM_RAM_MM_SHIFT); - min = ncmin ? (ncmin >> NVKM_RAM_MM_SHIFT) : max; - align >>= NVKM_RAM_MM_SHIFT; - - mem = kzalloc(sizeof(*mem), GFP_KERNEL); - if (!mem) - return -ENOMEM; - - mutex_lock(&ram->fb->subdev.mutex); - if (comp) { - if (align == (1 << (16 - NVKM_RAM_MM_SHIFT))) { - int n = (max >> 4) * comp; - - ret = nvkm_mm_head(tags, 0, 1, n, n, 1, &mem->tag); - if (ret) - mem->tag = NULL; - } - - if (unlikely(!mem->tag)) - comp = 0; - } - - mem->memtype = (comp << 7) | type; - mem->size = max; - - type = nv50_fb_memtype[type]; - node = &mem->mem; - do { - if (back) - ret = nvkm_mm_tail(heap, 0, type, max, min, align, &r); - else - ret = nvkm_mm_head(heap, 0, type, max, min, align, &r); - if (ret) { - mutex_unlock(&ram->fb->subdev.mutex); - ram->func->put(ram, &mem); - return ret; - } - - *node = r; - node = &r->next; - max -= r->length; - } while (max); - mutex_unlock(&ram->fb->subdev.mutex); - - mem->offset = (u64)mem->mem->offset << NVKM_RAM_MM_SHIFT; - *pmem = mem; - return 0; -} - static const struct nvkm_ram_func nv50_ram_func = { - .get = nv50_ram_get, - .put = nv50_ram_put, .calc = nv50_ram_calc, .prog = nv50_ram_prog, .tidy = nv50_ram_tidy, @@ -639,7 +547,6 @@ nv50_ram_ctor(const struct nvkm_ram_func *func, const u32 rsvd_head = ( 256 * 1024); /* vga memory */ const u32 rsvd_tail = (1024 * 1024); /* vbios etc */ u64 size = nvkm_rd32(device, 0x10020c); - u32 tags = nvkm_rd32(device, 0x100320); enum nvkm_ram_type type = NVKM_RAM_TYPE_UNKNOWN; int ret; @@ -660,7 +567,7 @@ nv50_ram_ctor(const struct nvkm_ram_func *func, size = (size & 0x000000ff) << 32 | (size & 0xffffff00); - ret = nvkm_ram_ctor(func, fb, type, size, tags, ram); + ret = nvkm_ram_ctor(func, fb, type, size, ram); if (ret) return ret; @@ -669,7 +576,8 @@ nv50_ram_ctor(const struct nvkm_ram_func *func, ram->ranks = (nvkm_rd32(device, 0x100200) & 0x4) ? 2 : 1; nvkm_mm_fini(&ram->vram); - return nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT, + return nvkm_mm_init(&ram->vram, NVKM_RAM_MM_NORMAL, + rsvd_head >> NVKM_RAM_MM_SHIFT, (size - rsvd_head - rsvd_tail) >> NVKM_RAM_MM_SHIFT, nv50_fb_vram_rblock(ram) >> NVKM_RAM_MM_SHIFT); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramseq.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramseq.h index 8df7306d5729..d8f5053e8e2a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramseq.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramseq.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_FBRAM_SEQ_H__ #define __NVKM_FBRAM_SEQ_H__ #include <subdev/bus/hwsq.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/regsnv04.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/regsnv04.h index 1f865f61504e..ad26fcbe9e06 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/regsnv04.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/regsnv04.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_FB_REGS_04_H__ #define __NVKM_FB_REGS_04_H__ diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fuse/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fuse/priv.h index b0390b540ef5..3a5595a9e457 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fuse/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fuse/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_FUSE_PRIV_H__ #define __NVKM_FUSE_PRIV_H__ #define nvkm_fuse(p) container_of((p), struct nvkm_fuse, subdev) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/priv.h index 371bcdbbe0d6..9759f13447bf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_GPIO_PRIV_H__ #define __NVKM_GPIO_PRIV_H__ #define nvkm_gpio(p) container_of((p), struct nvkm_gpio, subdev) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h index 9587ab456d9e..7d56c4ba693c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_I2C_AUX_H__ #define __NVKM_I2C_AUX_H__ #include "pad.h" diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/bus.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/bus.h index e1be14c23e54..bea0dd33961e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/bus.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/bus.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_I2C_BUS_H__ #define __NVKM_I2C_BUS_H__ #include "pad.h" diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h index 316c4536f29a..33f0c809e583 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_I2C_PAD_H__ #define __NVKM_I2C_PAD_H__ #include <subdev/i2c.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h index bf655a66ef40..f476a69b6cb7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_I2C_PRIV_H__ #define __NVKM_I2C_PRIV_H__ #define nvkm_i2c(p) container_of((p), struct nvkm_i2c, subdev) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/priv.h index 01caf798cf31..504a6d37ec50 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_IBUS_PRIV_H__ #define __NVKM_IBUS_PRIV_H__ diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/priv.h index e90e0f6ed008..bd599b8252ca 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_ICCSENSE_PRIV_H__ #define __NVKM_ICCSENSE_PRIV_H__ #define nvkm_iccsense(p) container_of((p), struct nvkm_iccsense, subdev) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c index 10c987a654ec..364ea4492acc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c @@ -23,181 +23,90 @@ */ #include "priv.h" -#include <core/memory.h> #include <subdev/bar.h> /****************************************************************************** * instmem object base implementation *****************************************************************************/ -#define nvkm_instobj(p) container_of((p), struct nvkm_instobj, memory) - -struct nvkm_instobj { - struct nvkm_memory memory; - struct nvkm_memory *parent; - struct nvkm_instmem *imem; - struct list_head head; - u32 *suspend; - void __iomem *map; -}; - -static enum nvkm_memory_target -nvkm_instobj_target(struct nvkm_memory *memory) -{ - memory = nvkm_instobj(memory)->parent; - return nvkm_memory_target(memory); -} - -static u64 -nvkm_instobj_addr(struct nvkm_memory *memory) -{ - memory = nvkm_instobj(memory)->parent; - return nvkm_memory_addr(memory); -} - -static u64 -nvkm_instobj_size(struct nvkm_memory *memory) -{ - memory = nvkm_instobj(memory)->parent; - return nvkm_memory_size(memory); -} - static void -nvkm_instobj_release(struct nvkm_memory *memory) +nvkm_instobj_load(struct nvkm_instobj *iobj) { - struct nvkm_instobj *iobj = nvkm_instobj(memory); - nvkm_bar_flush(iobj->imem->subdev.device->bar); -} - -static void __iomem * -nvkm_instobj_acquire(struct nvkm_memory *memory) -{ - return nvkm_instobj(memory)->map; -} - -static u32 -nvkm_instobj_rd32(struct nvkm_memory *memory, u64 offset) -{ - return ioread32_native(nvkm_instobj(memory)->map + offset); -} - -static void -nvkm_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data) -{ - iowrite32_native(data, nvkm_instobj(memory)->map + offset); -} + struct nvkm_memory *memory = &iobj->memory; + const u64 size = nvkm_memory_size(memory); + void __iomem *map; + int i; -static void -nvkm_instobj_map(struct nvkm_memory *memory, struct nvkm_vma *vma, u64 offset) -{ - memory = nvkm_instobj(memory)->parent; - nvkm_memory_map(memory, vma, offset); -} + if (!(map = nvkm_kmap(memory))) { + for (i = 0; i < size; i += 4) + nvkm_wo32(memory, i, iobj->suspend[i / 4]); + } else { + memcpy_toio(map, iobj->suspend, size); + } + nvkm_done(memory); -static void * -nvkm_instobj_dtor(struct nvkm_memory *memory) -{ - struct nvkm_instobj *iobj = nvkm_instobj(memory); - spin_lock(&iobj->imem->lock); - list_del(&iobj->head); - spin_unlock(&iobj->imem->lock); - nvkm_memory_del(&iobj->parent); - return iobj; + kvfree(iobj->suspend); + iobj->suspend = NULL; } -static const struct nvkm_memory_func -nvkm_instobj_func = { - .dtor = nvkm_instobj_dtor, - .target = nvkm_instobj_target, - .addr = nvkm_instobj_addr, - .size = nvkm_instobj_size, - .acquire = nvkm_instobj_acquire, - .release = nvkm_instobj_release, - .rd32 = nvkm_instobj_rd32, - .wr32 = nvkm_instobj_wr32, - .map = nvkm_instobj_map, -}; - -static void -nvkm_instobj_boot(struct nvkm_memory *memory, struct nvkm_vm *vm) +static int +nvkm_instobj_save(struct nvkm_instobj *iobj) { - memory = nvkm_instobj(memory)->parent; - nvkm_memory_boot(memory, vm); -} + struct nvkm_memory *memory = &iobj->memory; + const u64 size = nvkm_memory_size(memory); + void __iomem *map; + int i; -static void -nvkm_instobj_release_slow(struct nvkm_memory *memory) -{ - struct nvkm_instobj *iobj = nvkm_instobj(memory); - nvkm_instobj_release(memory); - nvkm_done(iobj->parent); -} + iobj->suspend = kvmalloc(size, GFP_KERNEL); + if (!iobj->suspend) + return -ENOMEM; -static void __iomem * -nvkm_instobj_acquire_slow(struct nvkm_memory *memory) -{ - struct nvkm_instobj *iobj = nvkm_instobj(memory); - iobj->map = nvkm_kmap(iobj->parent); - if (iobj->map) - memory->func = &nvkm_instobj_func; - return iobj->map; + if (!(map = nvkm_kmap(memory))) { + for (i = 0; i < size; i += 4) + iobj->suspend[i / 4] = nvkm_ro32(memory, i); + } else { + memcpy_fromio(iobj->suspend, map, size); + } + nvkm_done(memory); + return 0; } -static u32 -nvkm_instobj_rd32_slow(struct nvkm_memory *memory, u64 offset) +void +nvkm_instobj_dtor(struct nvkm_instmem *imem, struct nvkm_instobj *iobj) { - struct nvkm_instobj *iobj = nvkm_instobj(memory); - return nvkm_ro32(iobj->parent, offset); + spin_lock(&imem->lock); + list_del(&iobj->head); + spin_unlock(&imem->lock); } -static void -nvkm_instobj_wr32_slow(struct nvkm_memory *memory, u64 offset, u32 data) +void +nvkm_instobj_ctor(const struct nvkm_memory_func *func, + struct nvkm_instmem *imem, struct nvkm_instobj *iobj) { - struct nvkm_instobj *iobj = nvkm_instobj(memory); - return nvkm_wo32(iobj->parent, offset, data); + nvkm_memory_ctor(func, &iobj->memory); + iobj->suspend = NULL; + spin_lock(&imem->lock); + list_add_tail(&iobj->head, &imem->list); + spin_unlock(&imem->lock); } -static const struct nvkm_memory_func -nvkm_instobj_func_slow = { - .dtor = nvkm_instobj_dtor, - .target = nvkm_instobj_target, - .addr = nvkm_instobj_addr, - .size = nvkm_instobj_size, - .boot = nvkm_instobj_boot, - .acquire = nvkm_instobj_acquire_slow, - .release = nvkm_instobj_release_slow, - .rd32 = nvkm_instobj_rd32_slow, - .wr32 = nvkm_instobj_wr32_slow, - .map = nvkm_instobj_map, -}; - int nvkm_instobj_new(struct nvkm_instmem *imem, u32 size, u32 align, bool zero, struct nvkm_memory **pmemory) { + struct nvkm_subdev *subdev = &imem->subdev; struct nvkm_memory *memory = NULL; - struct nvkm_instobj *iobj; u32 offset; int ret; ret = imem->func->memory_new(imem, size, align, zero, &memory); - if (ret) + if (ret) { + nvkm_error(subdev, "OOM: %08x %08x %d\n", size, align, ret); goto done; - - if (!imem->func->persistent) { - if (!(iobj = kzalloc(sizeof(*iobj), GFP_KERNEL))) { - ret = -ENOMEM; - goto done; - } - - nvkm_memory_ctor(&nvkm_instobj_func_slow, &iobj->memory); - iobj->parent = memory; - iobj->imem = imem; - spin_lock(&iobj->imem->lock); - list_add_tail(&iobj->head, &imem->list); - spin_unlock(&iobj->imem->lock); - memory = &iobj->memory; } + nvkm_trace(subdev, "new %08x %08x %d: %010llx %010llx\n", size, align, + zero, nvkm_memory_addr(memory), nvkm_memory_size(memory)); + if (!imem->func->zero && zero) { void __iomem *map = nvkm_kmap(memory); if (unlikely(!map)) { @@ -211,7 +120,7 @@ nvkm_instobj_new(struct nvkm_instmem *imem, u32 size, u32 align, bool zero, done: if (ret) - nvkm_memory_del(&memory); + nvkm_memory_unref(&memory); *pmemory = memory; return ret; } @@ -232,39 +141,46 @@ nvkm_instmem_wr32(struct nvkm_instmem *imem, u32 addr, u32 data) return imem->func->wr32(imem, addr, data); } +void +nvkm_instmem_boot(struct nvkm_instmem *imem) +{ + /* Separate bootstrapped objects from normal list, as we need + * to make sure they're accessed with the slowpath on suspend + * and resume. + */ + struct nvkm_instobj *iobj, *itmp; + spin_lock(&imem->lock); + list_for_each_entry_safe(iobj, itmp, &imem->list, head) { + list_move_tail(&iobj->head, &imem->boot); + } + spin_unlock(&imem->lock); +} + static int nvkm_instmem_fini(struct nvkm_subdev *subdev, bool suspend) { struct nvkm_instmem *imem = nvkm_instmem(subdev); struct nvkm_instobj *iobj; - int i; - - if (imem->func->fini) - imem->func->fini(imem); if (suspend) { list_for_each_entry(iobj, &imem->list, head) { - struct nvkm_memory *memory = iobj->parent; - u64 size = nvkm_memory_size(memory); + int ret = nvkm_instobj_save(iobj); + if (ret) + return ret; + } - iobj->suspend = vmalloc(size); - if (!iobj->suspend) - return -ENOMEM; + nvkm_bar_bar2_fini(subdev->device); - for (i = 0; i < size; i += 4) - iobj->suspend[i / 4] = nvkm_ro32(memory, i); + list_for_each_entry(iobj, &imem->boot, head) { + int ret = nvkm_instobj_save(iobj); + if (ret) + return ret; } } - return 0; -} + if (imem->func->fini) + imem->func->fini(imem); -static int -nvkm_instmem_oneinit(struct nvkm_subdev *subdev) -{ - struct nvkm_instmem *imem = nvkm_instmem(subdev); - if (imem->func->oneinit) - return imem->func->oneinit(imem); return 0; } @@ -273,22 +189,31 @@ nvkm_instmem_init(struct nvkm_subdev *subdev) { struct nvkm_instmem *imem = nvkm_instmem(subdev); struct nvkm_instobj *iobj; - int i; + + list_for_each_entry(iobj, &imem->boot, head) { + if (iobj->suspend) + nvkm_instobj_load(iobj); + } + + nvkm_bar_bar2_init(subdev->device); list_for_each_entry(iobj, &imem->list, head) { - if (iobj->suspend) { - struct nvkm_memory *memory = iobj->parent; - u64 size = nvkm_memory_size(memory); - for (i = 0; i < size; i += 4) - nvkm_wo32(memory, i, iobj->suspend[i / 4]); - vfree(iobj->suspend); - iobj->suspend = NULL; - } + if (iobj->suspend) + nvkm_instobj_load(iobj); } return 0; } +static int +nvkm_instmem_oneinit(struct nvkm_subdev *subdev) +{ + struct nvkm_instmem *imem = nvkm_instmem(subdev); + if (imem->func->oneinit) + return imem->func->oneinit(imem); + return 0; +} + static void * nvkm_instmem_dtor(struct nvkm_subdev *subdev) { @@ -315,4 +240,5 @@ nvkm_instmem_ctor(const struct nvkm_instmem_func *func, imem->func = func; spin_lock_init(&imem->lock); INIT_LIST_HEAD(&imem->list); + INIT_LIST_HEAD(&imem->boot); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c index cd5adbec5e57..985f2990ab0d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -44,14 +44,13 @@ #include "priv.h" #include <core/memory.h> -#include <core/mm.h> #include <core/tegra.h> -#include <subdev/fb.h> #include <subdev/ltc.h> +#include <subdev/mmu.h> struct gk20a_instobj { struct nvkm_memory memory; - struct nvkm_mem mem; + struct nvkm_mm_node *mn; struct gk20a_instmem *imem; /* CPU mapping */ @@ -119,16 +118,22 @@ gk20a_instobj_target(struct nvkm_memory *memory) return NVKM_MEM_TARGET_NCOH; } +static u8 +gk20a_instobj_page(struct nvkm_memory *memory) +{ + return 12; +} + static u64 gk20a_instobj_addr(struct nvkm_memory *memory) { - return gk20a_instobj(memory)->mem.offset; + return (u64)gk20a_instobj(memory)->mn->offset << 12; } static u64 gk20a_instobj_size(struct nvkm_memory *memory) { - return (u64)gk20a_instobj(memory)->mem.size << 12; + return (u64)gk20a_instobj(memory)->mn->length << 12; } /* @@ -272,12 +277,18 @@ gk20a_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data) node->vaddr[offset / 4] = data; } -static void -gk20a_instobj_map(struct nvkm_memory *memory, struct nvkm_vma *vma, u64 offset) +static int +gk20a_instobj_map(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm, + struct nvkm_vma *vma, void *argv, u32 argc) { struct gk20a_instobj *node = gk20a_instobj(memory); + struct nvkm_vmm_map map = { + .memory = &node->memory, + .offset = offset, + .mem = node->mn, + }; - nvkm_vm_map_at(vma, offset, &node->mem); + return nvkm_vmm_map(vmm, vma, argv, argc, &map); } static void * @@ -290,8 +301,8 @@ gk20a_instobj_dtor_dma(struct nvkm_memory *memory) if (unlikely(!node->base.vaddr)) goto out; - dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->base.vaddr, - node->handle, imem->attrs); + dma_free_attrs(dev, (u64)node->base.mn->length << PAGE_SHIFT, + node->base.vaddr, node->handle, imem->attrs); out: return node; @@ -303,7 +314,7 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory) struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory); struct gk20a_instmem *imem = node->base.imem; struct device *dev = imem->base.subdev.device->dev; - struct nvkm_mm_node *r = node->base.mem.mem; + struct nvkm_mm_node *r = node->base.mn; int i; if (unlikely(!r)) @@ -321,7 +332,7 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory) r->offset &= ~BIT(imem->iommu_bit - imem->iommu_pgshift); /* Unmap pages from GPU address space and free them */ - for (i = 0; i < node->base.mem.size; i++) { + for (i = 0; i < node->base.mn->length; i++) { iommu_unmap(imem->domain, (r->offset + i) << imem->iommu_pgshift, PAGE_SIZE); dma_unmap_page(dev, node->dma_addrs[i], PAGE_SIZE, @@ -342,12 +353,11 @@ static const struct nvkm_memory_func gk20a_instobj_func_dma = { .dtor = gk20a_instobj_dtor_dma, .target = gk20a_instobj_target, + .page = gk20a_instobj_page, .addr = gk20a_instobj_addr, .size = gk20a_instobj_size, .acquire = gk20a_instobj_acquire_dma, .release = gk20a_instobj_release_dma, - .rd32 = gk20a_instobj_rd32, - .wr32 = gk20a_instobj_wr32, .map = gk20a_instobj_map, }; @@ -355,13 +365,18 @@ static const struct nvkm_memory_func gk20a_instobj_func_iommu = { .dtor = gk20a_instobj_dtor_iommu, .target = gk20a_instobj_target, + .page = gk20a_instobj_page, .addr = gk20a_instobj_addr, .size = gk20a_instobj_size, .acquire = gk20a_instobj_acquire_iommu, .release = gk20a_instobj_release_iommu, + .map = gk20a_instobj_map, +}; + +static const struct nvkm_memory_ptrs +gk20a_instobj_ptrs = { .rd32 = gk20a_instobj_rd32, .wr32 = gk20a_instobj_wr32, - .map = gk20a_instobj_map, }; static int @@ -377,6 +392,7 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align, *_node = &node->base; nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory); + node->base.memory.ptrs = &gk20a_instobj_ptrs; node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, &node->handle, GFP_KERNEL, @@ -397,8 +413,7 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align, node->r.offset = node->handle >> 12; node->r.length = (npages << PAGE_SHIFT) >> 12; - node->base.mem.offset = node->handle; - node->base.mem.mem = &node->r; + node->base.mn = &node->r; return 0; } @@ -424,6 +439,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align, node->dma_addrs = (void *)(node->pages + npages); nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.memory); + node->base.memory.ptrs = &gk20a_instobj_ptrs; /* Allocate backing memory */ for (i = 0; i < npages; i++) { @@ -474,8 +490,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align, /* IOMMU bit tells that an address is to be resolved through the IOMMU */ r->offset |= BIT(imem->iommu_bit - imem->iommu_pgshift); - node->base.mem.offset = ((u64)r->offset) << imem->iommu_pgshift; - node->base.mem.mem = r; + node->base.mn = r; return 0; release_area: @@ -523,13 +538,8 @@ gk20a_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero, node->imem = imem; - /* present memory for being mapped using small pages */ - node->mem.size = size >> 12; - node->mem.memtype = 0; - node->mem.page_shift = 12; - nvkm_debug(subdev, "alloc size: 0x%x, align: 0x%x, gaddr: 0x%llx\n", - size, align, node->mem.offset); + size, align, (u64)node->mn->offset << 12); return 0; } @@ -554,7 +564,6 @@ static const struct nvkm_instmem_func gk20a_instmem = { .dtor = gk20a_instmem_dtor, .memory_new = gk20a_instobj_new, - .persistent = true, .zero = false, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c index 6133c8bb2d42..6bf0dad46919 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c @@ -24,7 +24,6 @@ #define nv04_instmem(p) container_of((p), struct nv04_instmem, base) #include "priv.h" -#include <core/memory.h> #include <core/ramht.h> struct nv04_instmem { @@ -35,30 +34,39 @@ struct nv04_instmem { /****************************************************************************** * instmem object implementation *****************************************************************************/ -#define nv04_instobj(p) container_of((p), struct nv04_instobj, memory) +#define nv04_instobj(p) container_of((p), struct nv04_instobj, base.memory) struct nv04_instobj { - struct nvkm_memory memory; + struct nvkm_instobj base; struct nv04_instmem *imem; struct nvkm_mm_node *node; }; -static enum nvkm_memory_target -nv04_instobj_target(struct nvkm_memory *memory) +static void +nv04_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data) { - return NVKM_MEM_TARGET_INST; + struct nv04_instobj *iobj = nv04_instobj(memory); + struct nvkm_device *device = iobj->imem->base.subdev.device; + nvkm_wr32(device, 0x700000 + iobj->node->offset + offset, data); } -static u64 -nv04_instobj_addr(struct nvkm_memory *memory) +static u32 +nv04_instobj_rd32(struct nvkm_memory *memory, u64 offset) { - return nv04_instobj(memory)->node->offset; + struct nv04_instobj *iobj = nv04_instobj(memory); + struct nvkm_device *device = iobj->imem->base.subdev.device; + return nvkm_rd32(device, 0x700000 + iobj->node->offset + offset); } -static u64 -nv04_instobj_size(struct nvkm_memory *memory) +static const struct nvkm_memory_ptrs +nv04_instobj_ptrs = { + .rd32 = nv04_instobj_rd32, + .wr32 = nv04_instobj_wr32, +}; + +static void +nv04_instobj_release(struct nvkm_memory *memory) { - return nv04_instobj(memory)->node->length; } static void __iomem * @@ -69,25 +77,22 @@ nv04_instobj_acquire(struct nvkm_memory *memory) return device->pri + 0x700000 + iobj->node->offset; } -static void -nv04_instobj_release(struct nvkm_memory *memory) +static u64 +nv04_instobj_size(struct nvkm_memory *memory) { + return nv04_instobj(memory)->node->length; } -static u32 -nv04_instobj_rd32(struct nvkm_memory *memory, u64 offset) +static u64 +nv04_instobj_addr(struct nvkm_memory *memory) { - struct nv04_instobj *iobj = nv04_instobj(memory); - struct nvkm_device *device = iobj->imem->base.subdev.device; - return nvkm_rd32(device, 0x700000 + iobj->node->offset + offset); + return nv04_instobj(memory)->node->offset; } -static void -nv04_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data) +static enum nvkm_memory_target +nv04_instobj_target(struct nvkm_memory *memory) { - struct nv04_instobj *iobj = nv04_instobj(memory); - struct nvkm_device *device = iobj->imem->base.subdev.device; - nvkm_wr32(device, 0x700000 + iobj->node->offset + offset, data); + return NVKM_MEM_TARGET_INST; } static void * @@ -97,6 +102,7 @@ nv04_instobj_dtor(struct nvkm_memory *memory) mutex_lock(&iobj->imem->base.subdev.mutex); nvkm_mm_free(&iobj->imem->heap, &iobj->node); mutex_unlock(&iobj->imem->base.subdev.mutex); + nvkm_instobj_dtor(&iobj->imem->base, &iobj->base); return iobj; } @@ -108,8 +114,6 @@ nv04_instobj_func = { .addr = nv04_instobj_addr, .acquire = nv04_instobj_acquire, .release = nv04_instobj_release, - .rd32 = nv04_instobj_rd32, - .wr32 = nv04_instobj_wr32, }; static int @@ -122,9 +126,10 @@ nv04_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero, if (!(iobj = kzalloc(sizeof(*iobj), GFP_KERNEL))) return -ENOMEM; - *pmemory = &iobj->memory; + *pmemory = &iobj->base.memory; - nvkm_memory_ctor(&nv04_instobj_func, &iobj->memory); + nvkm_instobj_ctor(&nv04_instobj_func, &imem->base, &iobj->base); + iobj->base.memory.ptrs = &nv04_instobj_ptrs; iobj->imem = imem; mutex_lock(&imem->base.subdev.mutex); @@ -160,7 +165,7 @@ nv04_instmem_oneinit(struct nvkm_instmem *base) /* PRAMIN aperture maps over the end of VRAM, reserve it */ imem->base.reserved = 512 * 1024; - ret = nvkm_mm_init(&imem->heap, 0, imem->base.reserved, 1); + ret = nvkm_mm_init(&imem->heap, 0, 0, imem->base.reserved, 1); if (ret) return ret; @@ -194,10 +199,10 @@ static void * nv04_instmem_dtor(struct nvkm_instmem *base) { struct nv04_instmem *imem = nv04_instmem(base); - nvkm_memory_del(&imem->base.ramfc); - nvkm_memory_del(&imem->base.ramro); + nvkm_memory_unref(&imem->base.ramfc); + nvkm_memory_unref(&imem->base.ramro); nvkm_ramht_del(&imem->base.ramht); - nvkm_memory_del(&imem->base.vbios); + nvkm_memory_unref(&imem->base.vbios); nvkm_mm_fini(&imem->heap); return imem; } @@ -209,7 +214,6 @@ nv04_instmem = { .rd32 = nv04_instmem_rd32, .wr32 = nv04_instmem_wr32, .memory_new = nv04_instobj_new, - .persistent = false, .zero = false, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv40.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv40.c index c0543875e490..086c118488ef 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv40.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv40.c @@ -24,7 +24,6 @@ #define nv40_instmem(p) container_of((p), struct nv40_instmem, base) #include "priv.h" -#include <core/memory.h> #include <core/ramht.h> #include <engine/gr/nv40.h> @@ -37,30 +36,38 @@ struct nv40_instmem { /****************************************************************************** * instmem object implementation *****************************************************************************/ -#define nv40_instobj(p) container_of((p), struct nv40_instobj, memory) +#define nv40_instobj(p) container_of((p), struct nv40_instobj, base.memory) struct nv40_instobj { - struct nvkm_memory memory; + struct nvkm_instobj base; struct nv40_instmem *imem; struct nvkm_mm_node *node; }; -static enum nvkm_memory_target -nv40_instobj_target(struct nvkm_memory *memory) +static void +nv40_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data) { - return NVKM_MEM_TARGET_INST; + struct nv40_instobj *iobj = nv40_instobj(memory); + iowrite32_native(data, iobj->imem->iomem + iobj->node->offset + offset); } -static u64 -nv40_instobj_addr(struct nvkm_memory *memory) +static u32 +nv40_instobj_rd32(struct nvkm_memory *memory, u64 offset) { - return nv40_instobj(memory)->node->offset; + struct nv40_instobj *iobj = nv40_instobj(memory); + return ioread32_native(iobj->imem->iomem + iobj->node->offset + offset); } -static u64 -nv40_instobj_size(struct nvkm_memory *memory) +static const struct nvkm_memory_ptrs +nv40_instobj_ptrs = { + .rd32 = nv40_instobj_rd32, + .wr32 = nv40_instobj_wr32, +}; + +static void +nv40_instobj_release(struct nvkm_memory *memory) { - return nv40_instobj(memory)->node->length; + wmb(); } static void __iomem * @@ -70,23 +77,22 @@ nv40_instobj_acquire(struct nvkm_memory *memory) return iobj->imem->iomem + iobj->node->offset; } -static void -nv40_instobj_release(struct nvkm_memory *memory) +static u64 +nv40_instobj_size(struct nvkm_memory *memory) { + return nv40_instobj(memory)->node->length; } -static u32 -nv40_instobj_rd32(struct nvkm_memory *memory, u64 offset) +static u64 +nv40_instobj_addr(struct nvkm_memory *memory) { - struct nv40_instobj *iobj = nv40_instobj(memory); - return ioread32_native(iobj->imem->iomem + iobj->node->offset + offset); + return nv40_instobj(memory)->node->offset; } -static void -nv40_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data) +static enum nvkm_memory_target +nv40_instobj_target(struct nvkm_memory *memory) { - struct nv40_instobj *iobj = nv40_instobj(memory); - iowrite32_native(data, iobj->imem->iomem + iobj->node->offset + offset); + return NVKM_MEM_TARGET_INST; } static void * @@ -96,6 +102,7 @@ nv40_instobj_dtor(struct nvkm_memory *memory) mutex_lock(&iobj->imem->base.subdev.mutex); nvkm_mm_free(&iobj->imem->heap, &iobj->node); mutex_unlock(&iobj->imem->base.subdev.mutex); + nvkm_instobj_dtor(&iobj->imem->base, &iobj->base); return iobj; } @@ -107,8 +114,6 @@ nv40_instobj_func = { .addr = nv40_instobj_addr, .acquire = nv40_instobj_acquire, .release = nv40_instobj_release, - .rd32 = nv40_instobj_rd32, - .wr32 = nv40_instobj_wr32, }; static int @@ -121,9 +126,10 @@ nv40_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero, if (!(iobj = kzalloc(sizeof(*iobj), GFP_KERNEL))) return -ENOMEM; - *pmemory = &iobj->memory; + *pmemory = &iobj->base.memory; - nvkm_memory_ctor(&nv40_instobj_func, &iobj->memory); + nvkm_instobj_ctor(&nv40_instobj_func, &imem->base, &iobj->base); + iobj->base.memory.ptrs = &nv40_instobj_ptrs; iobj->imem = imem; mutex_lock(&imem->base.subdev.mutex); @@ -171,7 +177,7 @@ nv40_instmem_oneinit(struct nvkm_instmem *base) imem->base.reserved += 512 * 1024; /* object storage */ imem->base.reserved = round_up(imem->base.reserved, 4096); - ret = nvkm_mm_init(&imem->heap, 0, imem->base.reserved, 1); + ret = nvkm_mm_init(&imem->heap, 0, 0, imem->base.reserved, 1); if (ret) return ret; @@ -209,10 +215,10 @@ static void * nv40_instmem_dtor(struct nvkm_instmem *base) { struct nv40_instmem *imem = nv40_instmem(base); - nvkm_memory_del(&imem->base.ramfc); - nvkm_memory_del(&imem->base.ramro); + nvkm_memory_unref(&imem->base.ramfc); + nvkm_memory_unref(&imem->base.ramro); nvkm_ramht_del(&imem->base.ramht); - nvkm_memory_del(&imem->base.vbios); + nvkm_memory_unref(&imem->base.vbios); nvkm_mm_fini(&imem->heap); if (imem->iomem) iounmap(imem->iomem); @@ -226,7 +232,6 @@ nv40_instmem = { .rd32 = nv40_instmem_rd32, .wr32 = nv40_instmem_wr32, .memory_new = nv40_instobj_new, - .persistent = false, .zero = false, }; @@ -248,8 +253,8 @@ nv40_instmem_new(struct nvkm_device *device, int index, else bar = 3; - imem->iomem = ioremap(device->func->resource_addr(device, bar), - device->func->resource_size(device, bar)); + imem->iomem = ioremap_wc(device->func->resource_addr(device, bar), + device->func->resource_size(device, bar)); if (!imem->iomem) { nvkm_error(&imem->base.subdev, "unable to map PRAMIN BAR\n"); return -EFAULT; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c index 6d512c062ae3..1ba7289684aa 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c @@ -31,147 +31,293 @@ struct nv50_instmem { struct nvkm_instmem base; - unsigned long lock_flags; - spinlock_t lock; u64 addr; + + /* Mappings that can be evicted when BAR2 space has been exhausted. */ + struct list_head lru; }; /****************************************************************************** * instmem object implementation *****************************************************************************/ -#define nv50_instobj(p) container_of((p), struct nv50_instobj, memory) +#define nv50_instobj(p) container_of((p), struct nv50_instobj, base.memory) struct nv50_instobj { - struct nvkm_memory memory; + struct nvkm_instobj base; struct nv50_instmem *imem; - struct nvkm_mem *mem; - struct nvkm_vma bar; + struct nvkm_memory *ram; + struct nvkm_vma *bar; + refcount_t maps; void *map; + struct list_head lru; }; -static enum nvkm_memory_target -nv50_instobj_target(struct nvkm_memory *memory) +static void +nv50_instobj_wr32_slow(struct nvkm_memory *memory, u64 offset, u32 data) { - return NVKM_MEM_TARGET_VRAM; + struct nv50_instobj *iobj = nv50_instobj(memory); + struct nv50_instmem *imem = iobj->imem; + struct nvkm_device *device = imem->base.subdev.device; + u64 base = (nvkm_memory_addr(iobj->ram) + offset) & 0xffffff00000ULL; + u64 addr = (nvkm_memory_addr(iobj->ram) + offset) & 0x000000fffffULL; + unsigned long flags; + + spin_lock_irqsave(&imem->base.lock, flags); + if (unlikely(imem->addr != base)) { + nvkm_wr32(device, 0x001700, base >> 16); + imem->addr = base; + } + nvkm_wr32(device, 0x700000 + addr, data); + spin_unlock_irqrestore(&imem->base.lock, flags); } -static u64 -nv50_instobj_addr(struct nvkm_memory *memory) +static u32 +nv50_instobj_rd32_slow(struct nvkm_memory *memory, u64 offset) { - return nv50_instobj(memory)->mem->offset; + struct nv50_instobj *iobj = nv50_instobj(memory); + struct nv50_instmem *imem = iobj->imem; + struct nvkm_device *device = imem->base.subdev.device; + u64 base = (nvkm_memory_addr(iobj->ram) + offset) & 0xffffff00000ULL; + u64 addr = (nvkm_memory_addr(iobj->ram) + offset) & 0x000000fffffULL; + u32 data; + unsigned long flags; + + spin_lock_irqsave(&imem->base.lock, flags); + if (unlikely(imem->addr != base)) { + nvkm_wr32(device, 0x001700, base >> 16); + imem->addr = base; + } + data = nvkm_rd32(device, 0x700000 + addr); + spin_unlock_irqrestore(&imem->base.lock, flags); + return data; } -static u64 -nv50_instobj_size(struct nvkm_memory *memory) +static const struct nvkm_memory_ptrs +nv50_instobj_slow = { + .rd32 = nv50_instobj_rd32_slow, + .wr32 = nv50_instobj_wr32_slow, +}; + +static void +nv50_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data) { - return (u64)nv50_instobj(memory)->mem->size << NVKM_RAM_MM_SHIFT; + iowrite32_native(data, nv50_instobj(memory)->map + offset); } +static u32 +nv50_instobj_rd32(struct nvkm_memory *memory, u64 offset) +{ + return ioread32_native(nv50_instobj(memory)->map + offset); +} + +static const struct nvkm_memory_ptrs +nv50_instobj_fast = { + .rd32 = nv50_instobj_rd32, + .wr32 = nv50_instobj_wr32, +}; + static void -nv50_instobj_boot(struct nvkm_memory *memory, struct nvkm_vm *vm) +nv50_instobj_kmap(struct nv50_instobj *iobj, struct nvkm_vmm *vmm) { - struct nv50_instobj *iobj = nv50_instobj(memory); - struct nvkm_subdev *subdev = &iobj->imem->base.subdev; + struct nv50_instmem *imem = iobj->imem; + struct nv50_instobj *eobj; + struct nvkm_memory *memory = &iobj->base.memory; + struct nvkm_subdev *subdev = &imem->base.subdev; struct nvkm_device *device = subdev->device; + struct nvkm_vma *bar = NULL, *ebar; u64 size = nvkm_memory_size(memory); - void __iomem *map; + void *emap; int ret; - iobj->map = ERR_PTR(-ENOMEM); - - ret = nvkm_vm_get(vm, size, 12, NV_MEM_ACCESS_RW, &iobj->bar); - if (ret == 0) { - map = ioremap(device->func->resource_addr(device, 3) + - (u32)iobj->bar.offset, size); - if (map) { - nvkm_memory_map(memory, &iobj->bar, 0); - iobj->map = map; - } else { - nvkm_warn(subdev, "PRAMIN ioremap failed\n"); - nvkm_vm_put(&iobj->bar); + /* Attempt to allocate BAR2 address-space and map the object + * into it. The lock has to be dropped while doing this due + * to the possibility of recursion for page table allocation. + */ + mutex_unlock(&subdev->mutex); + while ((ret = nvkm_vmm_get(vmm, 12, size, &bar))) { + /* Evict unused mappings, and keep retrying until we either + * succeed,or there's no more objects left on the LRU. + */ + mutex_lock(&subdev->mutex); + eobj = list_first_entry_or_null(&imem->lru, typeof(*eobj), lru); + if (eobj) { + nvkm_debug(subdev, "evict %016llx %016llx @ %016llx\n", + nvkm_memory_addr(&eobj->base.memory), + nvkm_memory_size(&eobj->base.memory), + eobj->bar->addr); + list_del_init(&eobj->lru); + ebar = eobj->bar; + eobj->bar = NULL; + emap = eobj->map; + eobj->map = NULL; } - } else { - nvkm_warn(subdev, "PRAMIN exhausted\n"); + mutex_unlock(&subdev->mutex); + if (!eobj) + break; + iounmap(emap); + nvkm_vmm_put(vmm, &ebar); } + + if (ret == 0) + ret = nvkm_memory_map(memory, 0, vmm, bar, NULL, 0); + mutex_lock(&subdev->mutex); + if (ret || iobj->bar) { + /* We either failed, or another thread beat us. */ + mutex_unlock(&subdev->mutex); + nvkm_vmm_put(vmm, &bar); + mutex_lock(&subdev->mutex); + return; + } + + /* Make the mapping visible to the host. */ + iobj->bar = bar; + iobj->map = ioremap_wc(device->func->resource_addr(device, 3) + + (u32)iobj->bar->addr, size); + if (!iobj->map) { + nvkm_warn(subdev, "PRAMIN ioremap failed\n"); + nvkm_vmm_put(vmm, &iobj->bar); + } +} + +static int +nv50_instobj_map(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm, + struct nvkm_vma *vma, void *argv, u32 argc) +{ + memory = nv50_instobj(memory)->ram; + return nvkm_memory_map(memory, offset, vmm, vma, argv, argc); } static void nv50_instobj_release(struct nvkm_memory *memory) { - struct nv50_instmem *imem = nv50_instobj(memory)->imem; - spin_unlock_irqrestore(&imem->lock, imem->lock_flags); + struct nv50_instobj *iobj = nv50_instobj(memory); + struct nv50_instmem *imem = iobj->imem; + struct nvkm_subdev *subdev = &imem->base.subdev; + + wmb(); + nvkm_bar_flush(subdev->device->bar); + + if (refcount_dec_and_mutex_lock(&iobj->maps, &subdev->mutex)) { + /* Add the now-unused mapping to the LRU instead of directly + * unmapping it here, in case we need to map it again later. + */ + if (likely(iobj->lru.next) && iobj->map) { + BUG_ON(!list_empty(&iobj->lru)); + list_add_tail(&iobj->lru, &imem->lru); + } + + /* Switch back to NULL accessors when last map is gone. */ + iobj->base.memory.ptrs = NULL; + mutex_unlock(&subdev->mutex); + } } static void __iomem * nv50_instobj_acquire(struct nvkm_memory *memory) { struct nv50_instobj *iobj = nv50_instobj(memory); - struct nv50_instmem *imem = iobj->imem; - struct nvkm_bar *bar = imem->base.subdev.device->bar; - struct nvkm_vm *vm; - unsigned long flags; + struct nvkm_instmem *imem = &iobj->imem->base; + struct nvkm_vmm *vmm; + void __iomem *map = NULL; - if (!iobj->map && (vm = nvkm_bar_kmap(bar))) - nvkm_memory_boot(memory, vm); - if (!IS_ERR_OR_NULL(iobj->map)) + /* Already mapped? */ + if (refcount_inc_not_zero(&iobj->maps)) return iobj->map; - spin_lock_irqsave(&imem->lock, flags); - imem->lock_flags = flags; - return NULL; -} + /* Take the lock, and re-check that another thread hasn't + * already mapped the object in the meantime. + */ + mutex_lock(&imem->subdev.mutex); + if (refcount_inc_not_zero(&iobj->maps)) { + mutex_unlock(&imem->subdev.mutex); + return iobj->map; + } -static u32 -nv50_instobj_rd32(struct nvkm_memory *memory, u64 offset) -{ - struct nv50_instobj *iobj = nv50_instobj(memory); - struct nv50_instmem *imem = iobj->imem; - struct nvkm_device *device = imem->base.subdev.device; - u64 base = (iobj->mem->offset + offset) & 0xffffff00000ULL; - u64 addr = (iobj->mem->offset + offset) & 0x000000fffffULL; - u32 data; + /* Attempt to get a direct CPU mapping of the object. */ + if ((vmm = nvkm_bar_bar2_vmm(imem->subdev.device))) { + if (!iobj->map) + nv50_instobj_kmap(iobj, vmm); + map = iobj->map; + } - if (unlikely(imem->addr != base)) { - nvkm_wr32(device, 0x001700, base >> 16); - imem->addr = base; + if (!refcount_inc_not_zero(&iobj->maps)) { + /* Exclude object from eviction while it's being accessed. */ + if (likely(iobj->lru.next)) + list_del_init(&iobj->lru); + + if (map) + iobj->base.memory.ptrs = &nv50_instobj_fast; + else + iobj->base.memory.ptrs = &nv50_instobj_slow; + refcount_inc(&iobj->maps); } - data = nvkm_rd32(device, 0x700000 + addr); - return data; + + mutex_unlock(&imem->subdev.mutex); + return map; } static void -nv50_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data) +nv50_instobj_boot(struct nvkm_memory *memory, struct nvkm_vmm *vmm) { struct nv50_instobj *iobj = nv50_instobj(memory); - struct nv50_instmem *imem = iobj->imem; - struct nvkm_device *device = imem->base.subdev.device; - u64 base = (iobj->mem->offset + offset) & 0xffffff00000ULL; - u64 addr = (iobj->mem->offset + offset) & 0x000000fffffULL; - - if (unlikely(imem->addr != base)) { - nvkm_wr32(device, 0x001700, base >> 16); - imem->addr = base; + struct nvkm_instmem *imem = &iobj->imem->base; + + /* Exclude bootstrapped objects (ie. the page tables for the + * instmem BAR itself) from eviction. + */ + mutex_lock(&imem->subdev.mutex); + if (likely(iobj->lru.next)) { + list_del_init(&iobj->lru); + iobj->lru.next = NULL; } - nvkm_wr32(device, 0x700000 + addr, data); + + nv50_instobj_kmap(iobj, vmm); + nvkm_instmem_boot(imem); + mutex_unlock(&imem->subdev.mutex); } -static void -nv50_instobj_map(struct nvkm_memory *memory, struct nvkm_vma *vma, u64 offset) +static u64 +nv50_instobj_size(struct nvkm_memory *memory) { - struct nv50_instobj *iobj = nv50_instobj(memory); - nvkm_vm_map_at(vma, offset, iobj->mem); + return nvkm_memory_size(nv50_instobj(memory)->ram); +} + +static u64 +nv50_instobj_addr(struct nvkm_memory *memory) +{ + return nvkm_memory_addr(nv50_instobj(memory)->ram); +} + +static enum nvkm_memory_target +nv50_instobj_target(struct nvkm_memory *memory) +{ + return nvkm_memory_target(nv50_instobj(memory)->ram); } static void * nv50_instobj_dtor(struct nvkm_memory *memory) { struct nv50_instobj *iobj = nv50_instobj(memory); - struct nvkm_ram *ram = iobj->imem->base.subdev.device->fb->ram; - if (!IS_ERR_OR_NULL(iobj->map)) { - nvkm_vm_put(&iobj->bar); - iounmap(iobj->map); + struct nvkm_instmem *imem = &iobj->imem->base; + struct nvkm_vma *bar; + void *map = map; + + mutex_lock(&imem->subdev.mutex); + if (likely(iobj->lru.next)) + list_del(&iobj->lru); + map = iobj->map; + bar = iobj->bar; + mutex_unlock(&imem->subdev.mutex); + + if (map) { + struct nvkm_vmm *vmm = nvkm_bar_bar2_vmm(imem->subdev.device); + iounmap(map); + if (likely(vmm)) /* Can be NULL during BAR destructor. */ + nvkm_vmm_put(vmm, &bar); } - ram->func->put(ram, &iobj->mem); + + nvkm_memory_unref(&iobj->ram); + nvkm_instobj_dtor(imem, &iobj->base); return iobj; } @@ -184,8 +330,6 @@ nv50_instobj_func = { .boot = nv50_instobj_boot, .acquire = nv50_instobj_acquire, .release = nv50_instobj_release, - .rd32 = nv50_instobj_rd32, - .wr32 = nv50_instobj_wr32, .map = nv50_instobj_map, }; @@ -195,25 +339,19 @@ nv50_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero, { struct nv50_instmem *imem = nv50_instmem(base); struct nv50_instobj *iobj; - struct nvkm_ram *ram = imem->base.subdev.device->fb->ram; - int ret; + struct nvkm_device *device = imem->base.subdev.device; + u8 page = max(order_base_2(align), 12); if (!(iobj = kzalloc(sizeof(*iobj), GFP_KERNEL))) return -ENOMEM; - *pmemory = &iobj->memory; + *pmemory = &iobj->base.memory; - nvkm_memory_ctor(&nv50_instobj_func, &iobj->memory); + nvkm_instobj_ctor(&nv50_instobj_func, &imem->base, &iobj->base); iobj->imem = imem; + refcount_set(&iobj->maps, 0); + INIT_LIST_HEAD(&iobj->lru); - size = max((size + 4095) & ~4095, (u32)4096); - align = max((align + 4095) & ~4095, (u32)4096); - - ret = ram->func->get(ram, size, align, 0, 0x800, &iobj->mem); - if (ret) - return ret; - - iobj->mem->page_shift = 12; - return 0; + return nvkm_ram_get(device, 0, 1, page, size, true, true, &iobj->ram); } /****************************************************************************** @@ -230,7 +368,6 @@ static const struct nvkm_instmem_func nv50_instmem = { .fini = nv50_instmem_fini, .memory_new = nv50_instobj_new, - .persistent = false, .zero = false, }; @@ -243,7 +380,7 @@ nv50_instmem_new(struct nvkm_device *device, int index, if (!(imem = kzalloc(sizeof(*imem), GFP_KERNEL))) return -ENOMEM; nvkm_instmem_ctor(&nv50_instmem, device, index, &imem->base); - spin_lock_init(&imem->lock); + INIT_LIST_HEAD(&imem->lru); *pimem = &imem->base; return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/priv.h index ace4471864a3..b9e4751b9921 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_INSTMEM_PRIV_H__ #define __NVKM_INSTMEM_PRIV_H__ #define nvkm_instmem(p) container_of((p), struct nvkm_instmem, subdev) @@ -11,10 +12,22 @@ struct nvkm_instmem_func { void (*wr32)(struct nvkm_instmem *, u32 addr, u32 data); int (*memory_new)(struct nvkm_instmem *, u32 size, u32 align, bool zero, struct nvkm_memory **); - bool persistent; bool zero; }; void nvkm_instmem_ctor(const struct nvkm_instmem_func *, struct nvkm_device *, int index, struct nvkm_instmem *); +void nvkm_instmem_boot(struct nvkm_instmem *); + +#include <core/memory.h> + +struct nvkm_instobj { + struct nvkm_memory memory; + struct list_head head; + u32 *suspend; +}; + +void nvkm_instobj_ctor(const struct nvkm_memory_func *func, + struct nvkm_instmem *, struct nvkm_instobj *); +void nvkm_instobj_dtor(struct nvkm_instmem *, struct nvkm_instobj *); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c index 0c7ef250dcaf..1f185274d3e6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c @@ -23,26 +23,12 @@ */ #include "priv.h" -#include <subdev/fb.h> - -int -nvkm_ltc_tags_alloc(struct nvkm_ltc *ltc, u32 n, struct nvkm_mm_node **pnode) -{ - int ret = nvkm_mm_head(<c->tags, 0, 1, n, n, 1, pnode); - if (ret) - *pnode = NULL; - return ret; -} - -void -nvkm_ltc_tags_free(struct nvkm_ltc *ltc, struct nvkm_mm_node **pnode) -{ - nvkm_mm_free(<c->tags, pnode); -} +#include <core/memory.h> void -nvkm_ltc_tags_clear(struct nvkm_ltc *ltc, u32 first, u32 count) +nvkm_ltc_tags_clear(struct nvkm_device *device, u32 first, u32 count) { + struct nvkm_ltc *ltc = device->ltc; const u32 limit = first + count - 1; BUG_ON((first > limit) || (limit >= ltc->num_tags)); @@ -116,10 +102,7 @@ static void * nvkm_ltc_dtor(struct nvkm_subdev *subdev) { struct nvkm_ltc *ltc = nvkm_ltc(subdev); - struct nvkm_ram *ram = ltc->subdev.device->fb->ram; - nvkm_mm_fini(<c->tags); - if (ram) - nvkm_mm_free(&ram->vram, <c->tag_ram); + nvkm_memory_unref(<c->tag_ram); return ltc; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c index 4a0fa0a9b802..a21ef45b8572 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c @@ -23,6 +23,7 @@ */ #include "priv.h" +#include <core/memory.h> #include <subdev/fb.h> #include <subdev/timer.h> @@ -152,7 +153,10 @@ gf100_ltc_flush(struct nvkm_ltc *ltc) int gf100_ltc_oneinit_tag_ram(struct nvkm_ltc *ltc) { - struct nvkm_ram *ram = ltc->subdev.device->fb->ram; + struct nvkm_device *device = ltc->subdev.device; + struct nvkm_fb *fb = device->fb; + struct nvkm_ram *ram = fb->ram; + u32 bits = (nvkm_rd32(device, 0x100c80) & 0x00001000) ? 16 : 17; u32 tag_size, tag_margin, tag_align; int ret; @@ -164,8 +168,8 @@ gf100_ltc_oneinit_tag_ram(struct nvkm_ltc *ltc) /* tags for 1/4 of VRAM should be enough (8192/4 per GiB of VRAM) */ ltc->num_tags = (ram->size >> 17) / 4; - if (ltc->num_tags > (1 << 17)) - ltc->num_tags = 1 << 17; /* we have 17 bits in PTE */ + if (ltc->num_tags > (1 << bits)) + ltc->num_tags = 1 << bits; /* we have 16/17 bits in PTE */ ltc->num_tags = (ltc->num_tags + 63) & ~63; /* round up to 64 */ tag_align = ltc->ltc_nr * 0x800; @@ -181,14 +185,13 @@ gf100_ltc_oneinit_tag_ram(struct nvkm_ltc *ltc) */ tag_size = (ltc->num_tags / 64) * 0x6000 + tag_margin; tag_size += tag_align; - tag_size = (tag_size + 0xfff) >> 12; /* round up */ - ret = nvkm_mm_tail(&ram->vram, 1, 1, tag_size, tag_size, 1, - <c->tag_ram); + ret = nvkm_ram_get(device, NVKM_RAM_MM_NORMAL, 0x01, 12, tag_size, + true, true, <c->tag_ram); if (ret) { ltc->num_tags = 0; } else { - u64 tag_base = ((u64)ltc->tag_ram->offset << 12) + tag_margin; + u64 tag_base = nvkm_memory_addr(ltc->tag_ram) + tag_margin; tag_base += tag_align - 1; do_div(tag_base, tag_align); @@ -197,7 +200,8 @@ gf100_ltc_oneinit_tag_ram(struct nvkm_ltc *ltc) } mm_init: - return nvkm_mm_init(<c->tags, 0, ltc->num_tags, 1); + nvkm_mm_fini(&fb->tags); + return nvkm_mm_init(&fb->tags, 0, 0, ltc->num_tags, 1); } int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c index 0bdfb2f40266..e34d42108019 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c @@ -45,7 +45,7 @@ gp100_ltc_oneinit(struct nvkm_ltc *ltc) ltc->ltc_nr = nvkm_rd32(device, 0x12006c); ltc->lts_nr = nvkm_rd32(device, 0x17e280) >> 28; /*XXX: tagram allocation - TBD */ - return nvkm_mm_init(<c->tags, 0, 0, 1); + return 0; } static void diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h index 8b95f96e3ffa..e71cc25cc775 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_LTC_PRIV_H__ #define __NVKM_LTC_PRIV_H__ #define nvkm_ltc(p) container_of((p), struct nvkm_ltc, subdev) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h index 3be4126441e4..8869d79c2b59 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_MC_PRIV_H__ #define __NVKM_MC_PRIV_H__ #define nvkm_mc(p) container_of((p), struct nvkm_mc, subdev) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild index 012c9db687b2..352a65f9371c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild @@ -3,4 +3,33 @@ nvkm-y += nvkm/subdev/mmu/nv04.o nvkm-y += nvkm/subdev/mmu/nv41.o nvkm-y += nvkm/subdev/mmu/nv44.o nvkm-y += nvkm/subdev/mmu/nv50.o +nvkm-y += nvkm/subdev/mmu/g84.o nvkm-y += nvkm/subdev/mmu/gf100.o +nvkm-y += nvkm/subdev/mmu/gk104.o +nvkm-y += nvkm/subdev/mmu/gk20a.o +nvkm-y += nvkm/subdev/mmu/gm200.o +nvkm-y += nvkm/subdev/mmu/gm20b.o +nvkm-y += nvkm/subdev/mmu/gp100.o +nvkm-y += nvkm/subdev/mmu/gp10b.o + +nvkm-y += nvkm/subdev/mmu/mem.o +nvkm-y += nvkm/subdev/mmu/memnv04.o +nvkm-y += nvkm/subdev/mmu/memnv50.o +nvkm-y += nvkm/subdev/mmu/memgf100.o + +nvkm-y += nvkm/subdev/mmu/vmm.o +nvkm-y += nvkm/subdev/mmu/vmmnv04.o +nvkm-y += nvkm/subdev/mmu/vmmnv41.o +nvkm-y += nvkm/subdev/mmu/vmmnv44.o +nvkm-y += nvkm/subdev/mmu/vmmnv50.o +nvkm-y += nvkm/subdev/mmu/vmmgf100.o +nvkm-y += nvkm/subdev/mmu/vmmgk104.o +nvkm-y += nvkm/subdev/mmu/vmmgk20a.o +nvkm-y += nvkm/subdev/mmu/vmmgm200.o +nvkm-y += nvkm/subdev/mmu/vmmgm20b.o +nvkm-y += nvkm/subdev/mmu/vmmgp100.o +nvkm-y += nvkm/subdev/mmu/vmmgp10b.o + +nvkm-y += nvkm/subdev/mmu/umem.o +nvkm-y += nvkm/subdev/mmu/ummu.o +nvkm-y += nvkm/subdev/mmu/uvmm.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c index d06ad2c372bf..ee11ccaf0563 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c @@ -21,478 +21,367 @@ * * Authors: Ben Skeggs */ -#include "priv.h" +#include "ummu.h" +#include "vmm.h" -#include <core/gpuobj.h> +#include <subdev/bar.h> #include <subdev/fb.h> -void -nvkm_vm_map_at(struct nvkm_vma *vma, u64 delta, struct nvkm_mem *node) -{ - struct nvkm_vm *vm = vma->vm; - struct nvkm_mmu *mmu = vm->mmu; - struct nvkm_mm_node *r = node->mem; - int big = vma->node->type != mmu->func->spg_shift; - u32 offset = vma->node->offset + (delta >> 12); - u32 bits = vma->node->type - 12; - u32 pde = (offset >> mmu->func->pgt_bits) - vm->fpde; - u32 pte = (offset & ((1 << mmu->func->pgt_bits) - 1)) >> bits; - u32 max = 1 << (mmu->func->pgt_bits - bits); - u32 end, len; - - delta = 0; - while (r) { - u64 phys = (u64)r->offset << 12; - u32 num = r->length >> bits; - - while (num) { - struct nvkm_memory *pgt = vm->pgt[pde].mem[big]; - - end = (pte + num); - if (unlikely(end >= max)) - end = max; - len = end - pte; - - mmu->func->map(vma, pgt, node, pte, len, phys, delta); - - num -= len; - pte += len; - if (unlikely(end >= max)) { - phys += len << (bits + 12); - pde++; - pte = 0; - } - - delta += (u64)len << vma->node->type; - } - r = r->next; - }; +#include <nvif/if500d.h> +#include <nvif/if900d.h> - mmu->func->flush(vm); -} +struct nvkm_mmu_ptp { + struct nvkm_mmu_pt *pt; + struct list_head head; + u8 shift; + u16 mask; + u16 free; +}; static void -nvkm_vm_map_sg_table(struct nvkm_vma *vma, u64 delta, u64 length, - struct nvkm_mem *mem) +nvkm_mmu_ptp_put(struct nvkm_mmu *mmu, bool force, struct nvkm_mmu_pt *pt) { - struct nvkm_vm *vm = vma->vm; - struct nvkm_mmu *mmu = vm->mmu; - int big = vma->node->type != mmu->func->spg_shift; - u32 offset = vma->node->offset + (delta >> 12); - u32 bits = vma->node->type - 12; - u32 num = length >> vma->node->type; - u32 pde = (offset >> mmu->func->pgt_bits) - vm->fpde; - u32 pte = (offset & ((1 << mmu->func->pgt_bits) - 1)) >> bits; - u32 max = 1 << (mmu->func->pgt_bits - bits); - unsigned m, sglen; - u32 end, len; - int i; - struct scatterlist *sg; - - for_each_sg(mem->sg->sgl, sg, mem->sg->nents, i) { - struct nvkm_memory *pgt = vm->pgt[pde].mem[big]; - sglen = sg_dma_len(sg) >> PAGE_SHIFT; - - end = pte + sglen; - if (unlikely(end >= max)) - end = max; - len = end - pte; - - for (m = 0; m < len; m++) { - dma_addr_t addr = sg_dma_address(sg) + (m << PAGE_SHIFT); - - mmu->func->map_sg(vma, pgt, mem, pte, 1, &addr); - num--; - pte++; - - if (num == 0) - goto finish; - } - if (unlikely(end >= max)) { - pde++; - pte = 0; - } - if (m < sglen) { - for (; m < sglen; m++) { - dma_addr_t addr = sg_dma_address(sg) + (m << PAGE_SHIFT); - - mmu->func->map_sg(vma, pgt, mem, pte, 1, &addr); - num--; - pte++; - if (num == 0) - goto finish; - } - } - + const int slot = pt->base >> pt->ptp->shift; + struct nvkm_mmu_ptp *ptp = pt->ptp; + + /* If there were no free slots in the parent allocation before, + * there will be now, so return PTP to the cache. + */ + if (!ptp->free) + list_add(&ptp->head, &mmu->ptp.list); + ptp->free |= BIT(slot); + + /* If there's no more sub-allocations, destroy PTP. */ + if (ptp->free == ptp->mask) { + nvkm_mmu_ptc_put(mmu, force, &ptp->pt); + list_del(&ptp->head); + kfree(ptp); } -finish: - mmu->func->flush(vm); + + kfree(pt); } -static void -nvkm_vm_map_sg(struct nvkm_vma *vma, u64 delta, u64 length, - struct nvkm_mem *mem) +struct nvkm_mmu_pt * +nvkm_mmu_ptp_get(struct nvkm_mmu *mmu, u32 size, bool zero) { - struct nvkm_vm *vm = vma->vm; - struct nvkm_mmu *mmu = vm->mmu; - dma_addr_t *list = mem->pages; - int big = vma->node->type != mmu->func->spg_shift; - u32 offset = vma->node->offset + (delta >> 12); - u32 bits = vma->node->type - 12; - u32 num = length >> vma->node->type; - u32 pde = (offset >> mmu->func->pgt_bits) - vm->fpde; - u32 pte = (offset & ((1 << mmu->func->pgt_bits) - 1)) >> bits; - u32 max = 1 << (mmu->func->pgt_bits - bits); - u32 end, len; - - while (num) { - struct nvkm_memory *pgt = vm->pgt[pde].mem[big]; - - end = (pte + num); - if (unlikely(end >= max)) - end = max; - len = end - pte; - - mmu->func->map_sg(vma, pgt, mem, pte, len, list); - - num -= len; - pte += len; - list += len; - if (unlikely(end >= max)) { - pde++; - pte = 0; + struct nvkm_mmu_pt *pt; + struct nvkm_mmu_ptp *ptp; + int slot; + + if (!(pt = kzalloc(sizeof(*pt), GFP_KERNEL))) + return NULL; + + ptp = list_first_entry_or_null(&mmu->ptp.list, typeof(*ptp), head); + if (!ptp) { + /* Need to allocate a new parent to sub-allocate from. */ + if (!(ptp = kmalloc(sizeof(*ptp), GFP_KERNEL))) { + kfree(pt); + return NULL; } - } - mmu->func->flush(vm); -} + ptp->pt = nvkm_mmu_ptc_get(mmu, 0x1000, 0x1000, false); + if (!ptp->pt) { + kfree(ptp); + kfree(pt); + return NULL; + } -void -nvkm_vm_map(struct nvkm_vma *vma, struct nvkm_mem *node) -{ - if (node->sg) - nvkm_vm_map_sg_table(vma, 0, node->size << 12, node); - else - if (node->pages) - nvkm_vm_map_sg(vma, 0, node->size << 12, node); - else - nvkm_vm_map_at(vma, 0, node); + ptp->shift = order_base_2(size); + slot = nvkm_memory_size(ptp->pt->memory) >> ptp->shift; + ptp->mask = (1 << slot) - 1; + ptp->free = ptp->mask; + list_add(&ptp->head, &mmu->ptp.list); + } + pt->ptp = ptp; + pt->sub = true; + + /* Sub-allocate from parent object, removing PTP from cache + * if there's no more free slots left. + */ + slot = __ffs(ptp->free); + ptp->free &= ~BIT(slot); + if (!ptp->free) + list_del(&ptp->head); + + pt->memory = pt->ptp->pt->memory; + pt->base = slot << ptp->shift; + pt->addr = pt->ptp->pt->addr + pt->base; + return pt; } -void -nvkm_vm_unmap_at(struct nvkm_vma *vma, u64 delta, u64 length) +struct nvkm_mmu_ptc { + struct list_head head; + struct list_head item; + u32 size; + u32 refs; +}; + +static inline struct nvkm_mmu_ptc * +nvkm_mmu_ptc_find(struct nvkm_mmu *mmu, u32 size) { - struct nvkm_vm *vm = vma->vm; - struct nvkm_mmu *mmu = vm->mmu; - int big = vma->node->type != mmu->func->spg_shift; - u32 offset = vma->node->offset + (delta >> 12); - u32 bits = vma->node->type - 12; - u32 num = length >> vma->node->type; - u32 pde = (offset >> mmu->func->pgt_bits) - vm->fpde; - u32 pte = (offset & ((1 << mmu->func->pgt_bits) - 1)) >> bits; - u32 max = 1 << (mmu->func->pgt_bits - bits); - u32 end, len; - - while (num) { - struct nvkm_memory *pgt = vm->pgt[pde].mem[big]; - - end = (pte + num); - if (unlikely(end >= max)) - end = max; - len = end - pte; - - mmu->func->unmap(vma, pgt, pte, len); - - num -= len; - pte += len; - if (unlikely(end >= max)) { - pde++; - pte = 0; - } + struct nvkm_mmu_ptc *ptc; + + list_for_each_entry(ptc, &mmu->ptc.list, head) { + if (ptc->size == size) + return ptc; } - mmu->func->flush(vm); -} + ptc = kmalloc(sizeof(*ptc), GFP_KERNEL); + if (ptc) { + INIT_LIST_HEAD(&ptc->item); + ptc->size = size; + ptc->refs = 0; + list_add(&ptc->head, &mmu->ptc.list); + } -void -nvkm_vm_unmap(struct nvkm_vma *vma) -{ - nvkm_vm_unmap_at(vma, 0, (u64)vma->node->length << 12); + return ptc; } -static void -nvkm_vm_unmap_pgt(struct nvkm_vm *vm, int big, u32 fpde, u32 lpde) +void +nvkm_mmu_ptc_put(struct nvkm_mmu *mmu, bool force, struct nvkm_mmu_pt **ppt) { - struct nvkm_mmu *mmu = vm->mmu; - struct nvkm_vm_pgd *vpgd; - struct nvkm_vm_pgt *vpgt; - struct nvkm_memory *pgt; - u32 pde; - - for (pde = fpde; pde <= lpde; pde++) { - vpgt = &vm->pgt[pde - vm->fpde]; - if (--vpgt->refcount[big]) - continue; - - pgt = vpgt->mem[big]; - vpgt->mem[big] = NULL; - - list_for_each_entry(vpgd, &vm->pgd_list, head) { - mmu->func->map_pgt(vpgd->obj, pde, vpgt->mem); + struct nvkm_mmu_pt *pt = *ppt; + if (pt) { + /* Handle sub-allocated page tables. */ + if (pt->sub) { + mutex_lock(&mmu->ptp.mutex); + nvkm_mmu_ptp_put(mmu, force, pt); + mutex_unlock(&mmu->ptp.mutex); + return; } - nvkm_memory_del(&pgt); + /* Either cache or free the object. */ + mutex_lock(&mmu->ptc.mutex); + if (pt->ptc->refs < 8 /* Heuristic. */ && !force) { + list_add_tail(&pt->head, &pt->ptc->item); + pt->ptc->refs++; + } else { + nvkm_memory_unref(&pt->memory); + kfree(pt); + } + mutex_unlock(&mmu->ptc.mutex); } } -static int -nvkm_vm_map_pgt(struct nvkm_vm *vm, u32 pde, u32 type) +struct nvkm_mmu_pt * +nvkm_mmu_ptc_get(struct nvkm_mmu *mmu, u32 size, u32 align, bool zero) { - struct nvkm_mmu *mmu = vm->mmu; - struct nvkm_vm_pgt *vpgt = &vm->pgt[pde - vm->fpde]; - struct nvkm_vm_pgd *vpgd; - int big = (type != mmu->func->spg_shift); - u32 pgt_size; + struct nvkm_mmu_ptc *ptc; + struct nvkm_mmu_pt *pt; int ret; - pgt_size = (1 << (mmu->func->pgt_bits + 12)) >> type; - pgt_size *= 8; - - ret = nvkm_memory_new(mmu->subdev.device, NVKM_MEM_TARGET_INST, - pgt_size, 0x1000, true, &vpgt->mem[big]); - if (unlikely(ret)) - return ret; - - list_for_each_entry(vpgd, &vm->pgd_list, head) { - mmu->func->map_pgt(vpgd->obj, pde, vpgt->mem); + /* Sub-allocated page table (ie. GP100 LPT). */ + if (align < 0x1000) { + mutex_lock(&mmu->ptp.mutex); + pt = nvkm_mmu_ptp_get(mmu, align, zero); + mutex_unlock(&mmu->ptp.mutex); + return pt; } - vpgt->refcount[big]++; - return 0; -} - -int -nvkm_vm_get(struct nvkm_vm *vm, u64 size, u32 page_shift, u32 access, - struct nvkm_vma *vma) -{ - struct nvkm_mmu *mmu = vm->mmu; - u32 align = (1 << page_shift) >> 12; - u32 msize = size >> 12; - u32 fpde, lpde, pde; - int ret; - - mutex_lock(&vm->mutex); - ret = nvkm_mm_head(&vm->mm, 0, page_shift, msize, msize, align, - &vma->node); - if (unlikely(ret != 0)) { - mutex_unlock(&vm->mutex); - return ret; + /* Lookup cache for this page table size. */ + mutex_lock(&mmu->ptc.mutex); + ptc = nvkm_mmu_ptc_find(mmu, size); + if (!ptc) { + mutex_unlock(&mmu->ptc.mutex); + return NULL; } - fpde = (vma->node->offset >> mmu->func->pgt_bits); - lpde = (vma->node->offset + vma->node->length - 1) >> mmu->func->pgt_bits; - - for (pde = fpde; pde <= lpde; pde++) { - struct nvkm_vm_pgt *vpgt = &vm->pgt[pde - vm->fpde]; - int big = (vma->node->type != mmu->func->spg_shift); + /* If there's a free PT in the cache, reuse it. */ + pt = list_first_entry_or_null(&ptc->item, typeof(*pt), head); + if (pt) { + if (zero) + nvkm_fo64(pt->memory, 0, 0, size >> 3); + list_del(&pt->head); + ptc->refs--; + mutex_unlock(&mmu->ptc.mutex); + return pt; + } + mutex_unlock(&mmu->ptc.mutex); - if (likely(vpgt->refcount[big])) { - vpgt->refcount[big]++; - continue; - } + /* No such luck, we need to allocate. */ + if (!(pt = kmalloc(sizeof(*pt), GFP_KERNEL))) + return NULL; + pt->ptc = ptc; + pt->sub = false; - ret = nvkm_vm_map_pgt(vm, pde, vma->node->type); - if (ret) { - if (pde != fpde) - nvkm_vm_unmap_pgt(vm, big, fpde, pde - 1); - nvkm_mm_free(&vm->mm, &vma->node); - mutex_unlock(&vm->mutex); - return ret; - } + ret = nvkm_memory_new(mmu->subdev.device, NVKM_MEM_TARGET_INST, + size, align, zero, &pt->memory); + if (ret) { + kfree(pt); + return NULL; } - mutex_unlock(&vm->mutex); - vma->vm = NULL; - nvkm_vm_ref(vm, &vma->vm, NULL); - vma->offset = (u64)vma->node->offset << 12; - vma->access = access; - return 0; + pt->base = 0; + pt->addr = nvkm_memory_addr(pt->memory); + return pt; } void -nvkm_vm_put(struct nvkm_vma *vma) +nvkm_mmu_ptc_dump(struct nvkm_mmu *mmu) { - struct nvkm_mmu *mmu; - struct nvkm_vm *vm; - u32 fpde, lpde; - - if (unlikely(vma->node == NULL)) - return; - vm = vma->vm; - mmu = vm->mmu; - - fpde = (vma->node->offset >> mmu->func->pgt_bits); - lpde = (vma->node->offset + vma->node->length - 1) >> mmu->func->pgt_bits; - - mutex_lock(&vm->mutex); - nvkm_vm_unmap_pgt(vm, vma->node->type != mmu->func->spg_shift, fpde, lpde); - nvkm_mm_free(&vm->mm, &vma->node); - mutex_unlock(&vm->mutex); - - nvkm_vm_ref(NULL, &vma->vm, NULL); -} - -int -nvkm_vm_boot(struct nvkm_vm *vm, u64 size) -{ - struct nvkm_mmu *mmu = vm->mmu; - struct nvkm_memory *pgt; - int ret; - - ret = nvkm_memory_new(mmu->subdev.device, NVKM_MEM_TARGET_INST, - (size >> mmu->func->spg_shift) * 8, 0x1000, true, &pgt); - if (ret == 0) { - vm->pgt[0].refcount[0] = 1; - vm->pgt[0].mem[0] = pgt; - nvkm_memory_boot(pgt, vm); + struct nvkm_mmu_ptc *ptc; + list_for_each_entry(ptc, &mmu->ptc.list, head) { + struct nvkm_mmu_pt *pt, *tt; + list_for_each_entry_safe(pt, tt, &ptc->item, head) { + nvkm_memory_unref(&pt->memory); + list_del(&pt->head); + kfree(pt); + } } - - return ret; } -int -nvkm_vm_create(struct nvkm_mmu *mmu, u64 offset, u64 length, u64 mm_offset, - u32 block, struct lock_class_key *key, struct nvkm_vm **pvm) +static void +nvkm_mmu_ptc_fini(struct nvkm_mmu *mmu) { - static struct lock_class_key _key; - struct nvkm_vm *vm; - u64 mm_length = (offset + length) - mm_offset; - int ret; - - vm = kzalloc(sizeof(*vm), GFP_KERNEL); - if (!vm) - return -ENOMEM; - - __mutex_init(&vm->mutex, "&vm->mutex", key ? key : &_key); - INIT_LIST_HEAD(&vm->pgd_list); - vm->mmu = mmu; - kref_init(&vm->refcount); - vm->fpde = offset >> (mmu->func->pgt_bits + 12); - vm->lpde = (offset + length - 1) >> (mmu->func->pgt_bits + 12); - - vm->pgt = vzalloc((vm->lpde - vm->fpde + 1) * sizeof(*vm->pgt)); - if (!vm->pgt) { - kfree(vm); - return -ENOMEM; - } + struct nvkm_mmu_ptc *ptc, *ptct; - ret = nvkm_mm_init(&vm->mm, mm_offset >> 12, mm_length >> 12, - block >> 12); - if (ret) { - vfree(vm->pgt); - kfree(vm); - return ret; + list_for_each_entry_safe(ptc, ptct, &mmu->ptc.list, head) { + WARN_ON(!list_empty(&ptc->item)); + list_del(&ptc->head); + kfree(ptc); } - - *pvm = vm; - - return 0; } -int -nvkm_vm_new(struct nvkm_device *device, u64 offset, u64 length, u64 mm_offset, - struct lock_class_key *key, struct nvkm_vm **pvm) +static void +nvkm_mmu_ptc_init(struct nvkm_mmu *mmu) { - struct nvkm_mmu *mmu = device->mmu; - if (!mmu->func->create) - return -EINVAL; - return mmu->func->create(mmu, offset, length, mm_offset, key, pvm); + mutex_init(&mmu->ptc.mutex); + INIT_LIST_HEAD(&mmu->ptc.list); + mutex_init(&mmu->ptp.mutex); + INIT_LIST_HEAD(&mmu->ptp.list); } -static int -nvkm_vm_link(struct nvkm_vm *vm, struct nvkm_gpuobj *pgd) +static void +nvkm_mmu_type(struct nvkm_mmu *mmu, int heap, u8 type) { - struct nvkm_mmu *mmu = vm->mmu; - struct nvkm_vm_pgd *vpgd; - int i; - - if (!pgd) - return 0; - - vpgd = kzalloc(sizeof(*vpgd), GFP_KERNEL); - if (!vpgd) - return -ENOMEM; - - vpgd->obj = pgd; - - mutex_lock(&vm->mutex); - for (i = vm->fpde; i <= vm->lpde; i++) - mmu->func->map_pgt(pgd, i, vm->pgt[i - vm->fpde].mem); - list_add(&vpgd->head, &vm->pgd_list); - mutex_unlock(&vm->mutex); - return 0; + if (heap >= 0 && !WARN_ON(mmu->type_nr == ARRAY_SIZE(mmu->type))) { + mmu->type[mmu->type_nr].type = type | mmu->heap[heap].type; + mmu->type[mmu->type_nr].heap = heap; + mmu->type_nr++; + } } -static void -nvkm_vm_unlink(struct nvkm_vm *vm, struct nvkm_gpuobj *mpgd) +static int +nvkm_mmu_heap(struct nvkm_mmu *mmu, u8 type, u64 size) { - struct nvkm_vm_pgd *vpgd, *tmp; - - if (!mpgd) - return; - - mutex_lock(&vm->mutex); - list_for_each_entry_safe(vpgd, tmp, &vm->pgd_list, head) { - if (vpgd->obj == mpgd) { - list_del(&vpgd->head); - kfree(vpgd); - break; + if (size) { + if (!WARN_ON(mmu->heap_nr == ARRAY_SIZE(mmu->heap))) { + mmu->heap[mmu->heap_nr].type = type; + mmu->heap[mmu->heap_nr].size = size; + return mmu->heap_nr++; } } - mutex_unlock(&vm->mutex); + return -EINVAL; } static void -nvkm_vm_del(struct kref *kref) +nvkm_mmu_host(struct nvkm_mmu *mmu) { - struct nvkm_vm *vm = container_of(kref, typeof(*vm), refcount); - struct nvkm_vm_pgd *vpgd, *tmp; - - list_for_each_entry_safe(vpgd, tmp, &vm->pgd_list, head) { - nvkm_vm_unlink(vm, vpgd->obj); - } - - nvkm_mm_fini(&vm->mm); - vfree(vm->pgt); - kfree(vm); + struct nvkm_device *device = mmu->subdev.device; + u8 type = NVKM_MEM_KIND * !!mmu->func->kind_sys; + int heap; + + /* Non-mappable system memory. */ + heap = nvkm_mmu_heap(mmu, NVKM_MEM_HOST, ~0ULL); + nvkm_mmu_type(mmu, heap, type); + + /* Non-coherent, cached, system memory. + * + * Block-linear mappings of system memory must be done through + * BAR1, and cannot be supported on systems where we're unable + * to map BAR1 with write-combining. + */ + type |= NVKM_MEM_MAPPABLE; + if (!device->bar || device->bar->iomap_uncached) + nvkm_mmu_type(mmu, heap, type & ~NVKM_MEM_KIND); + else + nvkm_mmu_type(mmu, heap, type); + + /* Coherent, cached, system memory. + * + * Unsupported on systems that aren't able to support snooped + * mappings, and also for block-linear mappings which must be + * done through BAR1. + */ + type |= NVKM_MEM_COHERENT; + if (device->func->cpu_coherent) + nvkm_mmu_type(mmu, heap, type & ~NVKM_MEM_KIND); + + /* Uncached system memory. */ + nvkm_mmu_type(mmu, heap, type |= NVKM_MEM_UNCACHED); } -int -nvkm_vm_ref(struct nvkm_vm *ref, struct nvkm_vm **ptr, struct nvkm_gpuobj *pgd) +static void +nvkm_mmu_vram(struct nvkm_mmu *mmu) { - if (ref) { - int ret = nvkm_vm_link(ref, pgd); - if (ret) - return ret; - - kref_get(&ref->refcount); - } + struct nvkm_device *device = mmu->subdev.device; + struct nvkm_mm *mm = &device->fb->ram->vram; + const u32 sizeN = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NORMAL); + const u32 sizeU = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NOMAP); + const u32 sizeM = nvkm_mm_heap_size(mm, NVKM_RAM_MM_MIXED); + u8 type = NVKM_MEM_KIND * !!mmu->func->kind; + u8 heap = NVKM_MEM_VRAM; + int heapM, heapN, heapU; + + /* Mixed-memory doesn't support compression or display. */ + heapM = nvkm_mmu_heap(mmu, heap, sizeM << NVKM_RAM_MM_SHIFT); + + heap |= NVKM_MEM_COMP; + heap |= NVKM_MEM_DISP; + heapN = nvkm_mmu_heap(mmu, heap, sizeN << NVKM_RAM_MM_SHIFT); + heapU = nvkm_mmu_heap(mmu, heap, sizeU << NVKM_RAM_MM_SHIFT); + + /* Add non-mappable VRAM types first so that they're preferred + * over anything else. Mixed-memory will be slower than other + * heaps, it's prioritised last. + */ + nvkm_mmu_type(mmu, heapU, type); + nvkm_mmu_type(mmu, heapN, type); + nvkm_mmu_type(mmu, heapM, type); + + /* Add host memory types next, under the assumption that users + * wanting mappable memory want to use them as staging buffers + * or the like. + */ + nvkm_mmu_host(mmu); + + /* Mappable VRAM types go last, as they're basically the worst + * possible type to ask for unless there's no other choice. + */ + if (device->bar) { + /* Write-combined BAR1 access. */ + type |= NVKM_MEM_MAPPABLE; + if (!device->bar->iomap_uncached) { + nvkm_mmu_type(mmu, heapN, type); + nvkm_mmu_type(mmu, heapM, type); + } - if (*ptr) { - nvkm_vm_unlink(*ptr, pgd); - kref_put(&(*ptr)->refcount, nvkm_vm_del); + /* Uncached BAR1 access. */ + type |= NVKM_MEM_COHERENT; + type |= NVKM_MEM_UNCACHED; + nvkm_mmu_type(mmu, heapN, type); + nvkm_mmu_type(mmu, heapM, type); } - - *ptr = ref; - return 0; } static int nvkm_mmu_oneinit(struct nvkm_subdev *subdev) { struct nvkm_mmu *mmu = nvkm_mmu(subdev); - if (mmu->func->oneinit) - return mmu->func->oneinit(mmu); + + /* Determine available memory types. */ + if (mmu->subdev.device->fb && mmu->subdev.device->fb->ram) + nvkm_mmu_vram(mmu); + else + nvkm_mmu_host(mmu); + + if (mmu->func->vmm.global) { + int ret = nvkm_vmm_new(subdev->device, 0, 0, NULL, 0, NULL, + "gart", &mmu->vmm); + if (ret) + return ret; + } + return 0; } @@ -509,8 +398,10 @@ static void * nvkm_mmu_dtor(struct nvkm_subdev *subdev) { struct nvkm_mmu *mmu = nvkm_mmu(subdev); - if (mmu->func->dtor) - return mmu->func->dtor(mmu); + + nvkm_vmm_unref(&mmu->vmm); + + nvkm_mmu_ptc_fini(mmu); return mmu; } @@ -527,9 +418,10 @@ nvkm_mmu_ctor(const struct nvkm_mmu_func *func, struct nvkm_device *device, { nvkm_subdev_ctor(&nvkm_mmu, device, index, &mmu->subdev); mmu->func = func; - mmu->limit = func->limit; mmu->dma_bits = func->dma_bits; - mmu->lpg_shift = func->lpg_shift; + nvkm_mmu_ptc_init(mmu); + mmu->user.ctor = nvkm_ummu_new; + mmu->user.base = func->mmu.user; } int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/g84.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/g84.c new file mode 100644 index 000000000000..8accda5a772b --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/g84.c @@ -0,0 +1,41 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" +#include "vmm.h" + +#include <nvif/class.h> + +static const struct nvkm_mmu_func +g84_mmu = { + .dma_bits = 40, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_NV50}}, + .mem = {{ -1, 0, NVIF_CLASS_MEM_NV50}, nv50_mem_new, nv50_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_NV50}, nv50_vmm_new, false, 0x0200 }, + .kind = nv50_mmu_kind, + .kind_sys = true, +}; + +int +g84_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) +{ + return nvkm_mmu_new_(&g84_mmu, device, index, pmmu); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gf100.c index 7ac507c927bb..2d075246dc46 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gf100.c @@ -21,197 +21,65 @@ * * Authors: Ben Skeggs */ -#include "priv.h" +#include "mem.h" +#include "vmm.h" -#include <subdev/fb.h> -#include <subdev/ltc.h> -#include <subdev/timer.h> - -#include <core/gpuobj.h> +#include <nvif/class.h> /* Map from compressed to corresponding uncompressed storage type. * The value 0xff represents an invalid storage type. */ -const u8 gf100_pte_storage_type_map[256] = -{ - 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0xff, 0x01, /* 0x00 */ - 0x01, 0x01, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0x11, 0xff, 0xff, 0xff, 0xff, 0xff, 0x11, /* 0x10 */ - 0x11, 0x11, 0x11, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x26, 0x27, /* 0x20 */ - 0x28, 0x29, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x30 */ - 0xff, 0xff, 0x26, 0x27, 0x28, 0x29, 0x26, 0x27, - 0x28, 0x29, 0xff, 0xff, 0xff, 0xff, 0x46, 0xff, /* 0x40 */ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0x46, 0x46, 0x46, 0x46, 0xff, 0xff, 0xff, /* 0x50 */ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x60 */ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70 */ - 0xff, 0xff, 0xff, 0x7b, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7b, 0x7b, /* 0x80 */ - 0x7b, 0x7b, 0xff, 0x8b, 0x8c, 0x8d, 0x8e, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90 */ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0x8b, 0x8c, 0x8d, 0x8e, 0xa7, /* 0xa0 */ - 0xa8, 0xa9, 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0 */ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa7, - 0xa8, 0xa9, 0xaa, 0xc3, 0xff, 0xff, 0xff, 0xff, /* 0xc0 */ - 0xff, 0xff, 0xff, 0xff, 0xfe, 0xfe, 0xc3, 0xc3, - 0xc3, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0 */ - 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xfe, 0xff, 0xfe, - 0xfe, 0xff, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff, /* 0xe0 */ - 0xff, 0xfe, 0xff, 0xfe, 0xff, 0xfe, 0xfe, 0xff, - 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xf0 */ - 0xfe, 0xfe, 0xfe, 0xfe, 0xff, 0xfd, 0xfe, 0xff -}; - - -static void -gf100_vm_map_pgt(struct nvkm_gpuobj *pgd, u32 index, struct nvkm_memory *pgt[2]) -{ - u32 pde[2] = { 0, 0 }; - - if (pgt[0]) - pde[1] = 0x00000001 | (nvkm_memory_addr(pgt[0]) >> 8); - if (pgt[1]) - pde[0] = 0x00000001 | (nvkm_memory_addr(pgt[1]) >> 8); - - nvkm_kmap(pgd); - nvkm_wo32(pgd, (index * 8) + 0, pde[0]); - nvkm_wo32(pgd, (index * 8) + 4, pde[1]); - nvkm_done(pgd); -} - -static inline u64 -gf100_vm_addr(struct nvkm_vma *vma, u64 phys, u32 memtype, u32 target) -{ - phys >>= 8; - - phys |= 0x00000001; /* present */ - if (vma->access & NV_MEM_ACCESS_SYS) - phys |= 0x00000002; - - phys |= ((u64)target << 32); - phys |= ((u64)memtype << 36); - return phys; -} - -static void -gf100_vm_map(struct nvkm_vma *vma, struct nvkm_memory *pgt, - struct nvkm_mem *mem, u32 pte, u32 cnt, u64 phys, u64 delta) -{ - u64 next = 1 << (vma->node->type - 8); - - phys = gf100_vm_addr(vma, phys, mem->memtype, 0); - pte <<= 3; - - if (mem->tag) { - struct nvkm_ltc *ltc = vma->vm->mmu->subdev.device->ltc; - u32 tag = mem->tag->offset + (delta >> 17); - phys |= (u64)tag << (32 + 12); - next |= (u64)1 << (32 + 12); - nvkm_ltc_tags_clear(ltc, tag, cnt); - } - - nvkm_kmap(pgt); - while (cnt--) { - nvkm_wo32(pgt, pte + 0, lower_32_bits(phys)); - nvkm_wo32(pgt, pte + 4, upper_32_bits(phys)); - phys += next; - pte += 8; - } - nvkm_done(pgt); -} - -static void -gf100_vm_map_sg(struct nvkm_vma *vma, struct nvkm_memory *pgt, - struct nvkm_mem *mem, u32 pte, u32 cnt, dma_addr_t *list) -{ - u32 target = (vma->access & NV_MEM_ACCESS_NOSNOOP) ? 7 : 5; - /* compressed storage types are invalid for system memory */ - u32 memtype = gf100_pte_storage_type_map[mem->memtype & 0xff]; - - nvkm_kmap(pgt); - pte <<= 3; - while (cnt--) { - u64 phys = gf100_vm_addr(vma, *list++, memtype, target); - nvkm_wo32(pgt, pte + 0, lower_32_bits(phys)); - nvkm_wo32(pgt, pte + 4, upper_32_bits(phys)); - pte += 8; - } - nvkm_done(pgt); -} - -static void -gf100_vm_unmap(struct nvkm_vma *vma, struct nvkm_memory *pgt, u32 pte, u32 cnt) -{ - nvkm_kmap(pgt); - pte <<= 3; - while (cnt--) { - nvkm_wo32(pgt, pte + 0, 0x00000000); - nvkm_wo32(pgt, pte + 4, 0x00000000); - pte += 8; - } - nvkm_done(pgt); -} - -static void -gf100_vm_flush(struct nvkm_vm *vm) -{ - struct nvkm_mmu *mmu = vm->mmu; - struct nvkm_device *device = mmu->subdev.device; - struct nvkm_vm_pgd *vpgd; - u32 type; - - type = 0x00000001; /* PAGE_ALL */ - if (atomic_read(&vm->engref[NVKM_SUBDEV_BAR])) - type |= 0x00000004; /* HUB_ONLY */ - - mutex_lock(&mmu->subdev.mutex); - list_for_each_entry(vpgd, &vm->pgd_list, head) { - /* looks like maybe a "free flush slots" counter, the - * faster you write to 0x100cbc to more it decreases - */ - nvkm_msec(device, 2000, - if (nvkm_rd32(device, 0x100c80) & 0x00ff0000) - break; - ); - - nvkm_wr32(device, 0x100cb8, vpgd->obj->addr >> 8); - nvkm_wr32(device, 0x100cbc, 0x80000000 | type); - - /* wait for flush to be queued? */ - nvkm_msec(device, 2000, - if (nvkm_rd32(device, 0x100c80) & 0x00008000) - break; - ); - } - mutex_unlock(&mmu->subdev.mutex); -} - -static int -gf100_vm_create(struct nvkm_mmu *mmu, u64 offset, u64 length, u64 mm_offset, - struct lock_class_key *key, struct nvkm_vm **pvm) +const u8 * +gf100_mmu_kind(struct nvkm_mmu *mmu, int *count) { - return nvkm_vm_create(mmu, offset, length, mm_offset, 4096, key, pvm); + static const u8 + kind[256] = { + 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0xff, 0x01, /* 0x00 */ + 0x01, 0x01, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x11, 0xff, 0xff, 0xff, 0xff, 0xff, 0x11, /* 0x10 */ + 0x11, 0x11, 0x11, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x26, 0x27, /* 0x20 */ + 0x28, 0x29, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x30 */ + 0xff, 0xff, 0x26, 0x27, 0x28, 0x29, 0x26, 0x27, + 0x28, 0x29, 0xff, 0xff, 0xff, 0xff, 0x46, 0xff, /* 0x40 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x46, 0x46, 0x46, 0x46, 0xff, 0xff, 0xff, /* 0x50 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x60 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70 */ + 0xff, 0xff, 0xff, 0x7b, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7b, 0x7b, /* 0x80 */ + 0x7b, 0x7b, 0xff, 0x8b, 0x8c, 0x8d, 0x8e, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x8b, 0x8c, 0x8d, 0x8e, 0xa7, /* 0xa0 */ + 0xa8, 0xa9, 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa7, + 0xa8, 0xa9, 0xaa, 0xc3, 0xff, 0xff, 0xff, 0xff, /* 0xc0 */ + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xfe, 0xc3, 0xc3, + 0xc3, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0 */ + 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xfe, 0xff, 0xfe, + 0xfe, 0xff, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff, /* 0xe0 */ + 0xff, 0xfe, 0xff, 0xfe, 0xff, 0xfe, 0xfe, 0xff, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xf0 */ + 0xfe, 0xfe, 0xfe, 0xfe, 0xff, 0xfd, 0xfe, 0xff + }; + + *count = ARRAY_SIZE(kind); + return kind; } static const struct nvkm_mmu_func gf100_mmu = { - .limit = (1ULL << 40), .dma_bits = 40, - .pgt_bits = 27 - 12, - .spg_shift = 12, - .lpg_shift = 17, - .create = gf100_vm_create, - .map_pgt = gf100_vm_map_pgt, - .map = gf100_vm_map, - .map_sg = gf100_vm_map_sg, - .unmap = gf100_vm_unmap, - .flush = gf100_vm_flush, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, + .mem = {{ -1, 0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_GF100}, gf100_vmm_new }, + .kind = gf100_mmu_kind, + .kind_sys = true, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk104.c new file mode 100644 index 000000000000..3d7d1eb1cff9 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk104.c @@ -0,0 +1,41 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" +#include "vmm.h" + +#include <nvif/class.h> + +static const struct nvkm_mmu_func +gk104_mmu = { + .dma_bits = 40, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, + .mem = {{ -1, 0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_GF100}, gk104_vmm_new }, + .kind = gf100_mmu_kind, + .kind_sys = true, +}; + +int +gk104_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) +{ + return nvkm_mmu_new_(&gk104_mmu, device, index, pmmu); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c new file mode 100644 index 000000000000..ac74965a60d4 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c @@ -0,0 +1,41 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" +#include "vmm.h" + +#include <nvif/class.h> + +static const struct nvkm_mmu_func +gk20a_mmu = { + .dma_bits = 40, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, + .mem = {{ -1, -1, NVIF_CLASS_MEM_GF100}, .umap = gf100_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_GF100}, gk20a_vmm_new }, + .kind = gf100_mmu_kind, + .kind_sys = true, +}; + +int +gk20a_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) +{ + return nvkm_mmu_new_(&gk20a_mmu, device, index, pmmu); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm200.c new file mode 100644 index 000000000000..dbf644ebac97 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm200.c @@ -0,0 +1,97 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" +#include "vmm.h" + +#include <subdev/fb.h> + +#include <nvif/class.h> + +const u8 * +gm200_mmu_kind(struct nvkm_mmu *mmu, int *count) +{ + static const u8 + kind[256] = { + 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0xff, 0x01, /* 0x00 */ + 0x01, 0x01, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x11, 0xff, 0xff, 0xff, 0xff, 0xff, 0x11, /* 0x10 */ + 0x11, 0x11, 0x11, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x26, 0x27, /* 0x20 */ + 0x28, 0x29, 0x2a, 0x2b, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x30 */ + 0xff, 0xff, 0x26, 0x27, 0x28, 0x29, 0x26, 0x27, + 0x28, 0x29, 0xff, 0xff, 0xff, 0xff, 0x46, 0xff, /* 0x40 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x46, 0x46, 0x46, 0x46, 0xff, 0xff, 0xff, /* 0x50 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x60 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70 */ + 0xff, 0xff, 0xff, 0x7b, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7b, 0x7b, /* 0x80 */ + 0x7b, 0x7b, 0xff, 0x8b, 0x8c, 0x8d, 0x8e, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x8b, 0x8c, 0x8d, 0x8e, 0xa7, /* 0xa0 */ + 0xa8, 0xa9, 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa7, + 0xa8, 0xa9, 0xaa, 0xc3, 0xff, 0xff, 0xff, 0xff, /* 0xc0 */ + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xfe, 0xc3, 0xc3, + 0xc3, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0 */ + 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xfe, 0xff, 0xfe, + 0xfe, 0xff, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff, /* 0xe0 */ + 0xff, 0xfe, 0xff, 0xfe, 0xff, 0xfe, 0xfe, 0xff, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xf0 */ + 0xfe, 0xfe, 0xfe, 0xfe, 0xff, 0xfd, 0xfe, 0xff + }; + *count = ARRAY_SIZE(kind); + return kind; +} + +static const struct nvkm_mmu_func +gm200_mmu = { + .dma_bits = 40, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, + .mem = {{ -1, 0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map }, + .vmm = {{ -1, 0, NVIF_CLASS_VMM_GM200}, gm200_vmm_new }, + .kind = gm200_mmu_kind, + .kind_sys = true, +}; + +static const struct nvkm_mmu_func +gm200_mmu_fixed = { + .dma_bits = 40, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, + .mem = {{ -1, 0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_GM200}, gm200_vmm_new_fixed }, + .kind = gm200_mmu_kind, + .kind_sys = true, +}; + +int +gm200_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) +{ + if (device->fb->page) + return nvkm_mmu_new_(&gm200_mmu_fixed, device, index, pmmu); + return nvkm_mmu_new_(&gm200_mmu, device, index, pmmu); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c new file mode 100644 index 000000000000..7353a94b4091 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c @@ -0,0 +1,55 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" +#include "vmm.h" + +#include <subdev/fb.h> + +#include <nvif/class.h> + +static const struct nvkm_mmu_func +gm20b_mmu = { + .dma_bits = 40, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, + .mem = {{ -1, -1, NVIF_CLASS_MEM_GF100}, .umap = gf100_mem_map }, + .vmm = {{ -1, 0, NVIF_CLASS_VMM_GM200}, gm20b_vmm_new }, + .kind = gm200_mmu_kind, + .kind_sys = true, +}; + +static const struct nvkm_mmu_func +gm20b_mmu_fixed = { + .dma_bits = 40, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, + .mem = {{ -1, -1, NVIF_CLASS_MEM_GF100}, .umap = gf100_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_GM200}, gm20b_vmm_new_fixed }, + .kind = gm200_mmu_kind, + .kind_sys = true, +}; + +int +gm20b_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) +{ + if (device->fb->page) + return nvkm_mmu_new_(&gm20b_mmu_fixed, device, index, pmmu); + return nvkm_mmu_new_(&gm20b_mmu, device, index, pmmu); +} diff --git a/drivers/gpu/drm/radeon/radeon_kfd.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp100.c index 9df1fea8e971..651b8805c67c 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp100.c @@ -1,5 +1,5 @@ /* - * Copyright 2014 Advanced Micro Devices, Inc. + * Copyright 2017 Red Hat Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -19,29 +19,27 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ +#include "mem.h" +#include "vmm.h" -/* - * radeon_kfd.h defines the private interface between the - * AMD kernel graphics drivers and the AMD KFD. - */ - -#ifndef RADEON_KFD_H_INCLUDED -#define RADEON_KFD_H_INCLUDED - -#include <linux/types.h> -#include "kgd_kfd_interface.h" - -struct radeon_device; +#include <core/option.h> -int radeon_kfd_init(void); -void radeon_kfd_fini(void); +#include <nvif/class.h> -void radeon_kfd_suspend(struct radeon_device *rdev); -int radeon_kfd_resume(struct radeon_device *rdev); -void radeon_kfd_interrupt(struct radeon_device *rdev, - const void *ih_ring_entry); -void radeon_kfd_device_probe(struct radeon_device *rdev); -void radeon_kfd_device_init(struct radeon_device *rdev); -void radeon_kfd_device_fini(struct radeon_device *rdev); +static const struct nvkm_mmu_func +gp100_mmu = { + .dma_bits = 47, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, + .mem = {{ -1, 0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_GP100}, gp100_vmm_new }, + .kind = gm200_mmu_kind, + .kind_sys = true, +}; -#endif /* RADEON_KFD_H_INCLUDED */ +int +gp100_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) +{ + if (!nvkm_boolopt(device->cfgopt, "GP100MmuLayout", true)) + return gm200_mmu_new(device, index, pmmu); + return nvkm_mmu_new_(&gp100_mmu, device, index, pmmu); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b. b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b. new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b. diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c new file mode 100644 index 000000000000..3bd3db31e0bb --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c @@ -0,0 +1,45 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" +#include "vmm.h" + +#include <core/option.h> + +#include <nvif/class.h> + +static const struct nvkm_mmu_func +gp10b_mmu = { + .dma_bits = 47, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, + .mem = {{ -1, -1, NVIF_CLASS_MEM_GF100}, .umap = gf100_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_GP100}, gp10b_vmm_new }, + .kind = gm200_mmu_kind, + .kind_sys = true, +}; + +int +gp10b_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) +{ + if (!nvkm_boolopt(device->cfgopt, "GP100MmuLayout", true)) + return gm20b_mmu_new(device, index, pmmu); + return nvkm_mmu_new_(&gp10b_mmu, device, index, pmmu); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mem.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mem.c new file mode 100644 index 000000000000..39808489f21d --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mem.c @@ -0,0 +1,242 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#define nvkm_mem(p) container_of((p), struct nvkm_mem, memory) +#include "mem.h" + +#include <core/memory.h> + +#include <nvif/if000a.h> +#include <nvif/unpack.h> + +struct nvkm_mem { + struct nvkm_memory memory; + enum nvkm_memory_target target; + struct nvkm_mmu *mmu; + u64 pages; + struct page **mem; + union { + struct scatterlist *sgl; + dma_addr_t *dma; + }; +}; + +static enum nvkm_memory_target +nvkm_mem_target(struct nvkm_memory *memory) +{ + return nvkm_mem(memory)->target; +} + +static u8 +nvkm_mem_page(struct nvkm_memory *memory) +{ + return PAGE_SHIFT; +} + +static u64 +nvkm_mem_addr(struct nvkm_memory *memory) +{ + struct nvkm_mem *mem = nvkm_mem(memory); + if (mem->pages == 1 && mem->mem) + return mem->dma[0]; + return ~0ULL; +} + +static u64 +nvkm_mem_size(struct nvkm_memory *memory) +{ + return nvkm_mem(memory)->pages << PAGE_SHIFT; +} + +static int +nvkm_mem_map_dma(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm, + struct nvkm_vma *vma, void *argv, u32 argc) +{ + struct nvkm_mem *mem = nvkm_mem(memory); + struct nvkm_vmm_map map = { + .memory = &mem->memory, + .offset = offset, + .dma = mem->dma, + }; + return nvkm_vmm_map(vmm, vma, argv, argc, &map); +} + +static void * +nvkm_mem_dtor(struct nvkm_memory *memory) +{ + struct nvkm_mem *mem = nvkm_mem(memory); + if (mem->mem) { + while (mem->pages--) { + dma_unmap_page(mem->mmu->subdev.device->dev, + mem->dma[mem->pages], PAGE_SIZE, + DMA_BIDIRECTIONAL); + __free_page(mem->mem[mem->pages]); + } + kvfree(mem->dma); + kvfree(mem->mem); + } + return mem; +} + +static const struct nvkm_memory_func +nvkm_mem_dma = { + .dtor = nvkm_mem_dtor, + .target = nvkm_mem_target, + .page = nvkm_mem_page, + .addr = nvkm_mem_addr, + .size = nvkm_mem_size, + .map = nvkm_mem_map_dma, +}; + +static int +nvkm_mem_map_sgl(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm, + struct nvkm_vma *vma, void *argv, u32 argc) +{ + struct nvkm_mem *mem = nvkm_mem(memory); + struct nvkm_vmm_map map = { + .memory = &mem->memory, + .offset = offset, + .sgl = mem->sgl, + }; + return nvkm_vmm_map(vmm, vma, argv, argc, &map); +} + +static const struct nvkm_memory_func +nvkm_mem_sgl = { + .dtor = nvkm_mem_dtor, + .target = nvkm_mem_target, + .page = nvkm_mem_page, + .addr = nvkm_mem_addr, + .size = nvkm_mem_size, + .map = nvkm_mem_map_sgl, +}; + +int +nvkm_mem_map_host(struct nvkm_memory *memory, void **pmap) +{ + struct nvkm_mem *mem = nvkm_mem(memory); + if (mem->mem) { + *pmap = vmap(mem->mem, mem->pages, VM_MAP, PAGE_KERNEL); + return *pmap ? 0 : -EFAULT; + } + return -EINVAL; +} + +static int +nvkm_mem_new_host(struct nvkm_mmu *mmu, int type, u8 page, u64 size, + void *argv, u32 argc, struct nvkm_memory **pmemory) +{ + struct device *dev = mmu->subdev.device->dev; + union { + struct nvif_mem_ram_vn vn; + struct nvif_mem_ram_v0 v0; + } *args = argv; + int ret = -ENOSYS; + enum nvkm_memory_target target; + struct nvkm_mem *mem; + gfp_t gfp = GFP_USER | __GFP_ZERO; + + if ( (mmu->type[type].type & NVKM_MEM_COHERENT) && + !(mmu->type[type].type & NVKM_MEM_UNCACHED)) + target = NVKM_MEM_TARGET_HOST; + else + target = NVKM_MEM_TARGET_NCOH; + + if (page != PAGE_SHIFT) + return -EINVAL; + + if (!(mem = kzalloc(sizeof(*mem), GFP_KERNEL))) + return -ENOMEM; + mem->target = target; + mem->mmu = mmu; + *pmemory = &mem->memory; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + if (args->v0.dma) { + nvkm_memory_ctor(&nvkm_mem_dma, &mem->memory); + mem->dma = args->v0.dma; + } else { + nvkm_memory_ctor(&nvkm_mem_sgl, &mem->memory); + mem->sgl = args->v0.sgl; + } + + if (!IS_ALIGNED(size, PAGE_SIZE)) + return -EINVAL; + mem->pages = size >> PAGE_SHIFT; + return 0; + } else + if ( (ret = nvif_unvers(ret, &argv, &argc, args->vn))) { + kfree(mem); + return ret; + } + + nvkm_memory_ctor(&nvkm_mem_dma, &mem->memory); + size = ALIGN(size, PAGE_SIZE) >> PAGE_SHIFT; + + if (!(mem->mem = kvmalloc(sizeof(*mem->mem) * size, GFP_KERNEL))) + return -ENOMEM; + if (!(mem->dma = kvmalloc(sizeof(*mem->dma) * size, GFP_KERNEL))) + return -ENOMEM; + + if (mmu->dma_bits > 32) + gfp |= GFP_HIGHUSER; + else + gfp |= GFP_DMA32; + + for (mem->pages = 0; size; size--, mem->pages++) { + struct page *p = alloc_page(gfp); + if (!p) + return -ENOMEM; + + mem->dma[mem->pages] = dma_map_page(mmu->subdev.device->dev, + p, 0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, mem->dma[mem->pages])) { + __free_page(p); + return -ENOMEM; + } + + mem->mem[mem->pages] = p; + } + + return 0; +} + +int +nvkm_mem_new_type(struct nvkm_mmu *mmu, int type, u8 page, u64 size, + void *argv, u32 argc, struct nvkm_memory **pmemory) +{ + struct nvkm_memory *memory = NULL; + int ret; + + if (mmu->type[type].type & NVKM_MEM_VRAM) { + ret = mmu->func->mem.vram(mmu, type, page, size, + argv, argc, &memory); + } else { + ret = nvkm_mem_new_host(mmu, type, page, size, + argv, argc, &memory); + } + + if (ret) + nvkm_memory_unref(&memory); + *pmemory = memory; + return ret; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mem.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mem.h new file mode 100644 index 000000000000..234267e1b215 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mem.h @@ -0,0 +1,23 @@ +#ifndef __NVKM_MEM_H__ +#define __NVKM_MEM_H__ +#include "priv.h" + +int nvkm_mem_new_type(struct nvkm_mmu *, int type, u8 page, u64 size, + void *argv, u32 argc, struct nvkm_memory **); +int nvkm_mem_map_host(struct nvkm_memory *, void **pmap); + +int nv04_mem_new(struct nvkm_mmu *, int, u8, u64, void *, u32, + struct nvkm_memory **); +int nv04_mem_map(struct nvkm_mmu *, struct nvkm_memory *, void *, u32, + u64 *, u64 *, struct nvkm_vma **); + +int nv50_mem_new(struct nvkm_mmu *, int, u8, u64, void *, u32, + struct nvkm_memory **); +int nv50_mem_map(struct nvkm_mmu *, struct nvkm_memory *, void *, u32, + u64 *, u64 *, struct nvkm_vma **); + +int gf100_mem_new(struct nvkm_mmu *, int, u8, u64, void *, u32, + struct nvkm_memory **); +int gf100_mem_map(struct nvkm_mmu *, struct nvkm_memory *, void *, u32, + u64 *, u64 *, struct nvkm_vma **); +#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memgf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memgf100.c new file mode 100644 index 000000000000..d9c9bee45222 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memgf100.c @@ -0,0 +1,94 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" + +#include <core/memory.h> +#include <subdev/bar.h> +#include <subdev/fb.h> + +#include <nvif/class.h> +#include <nvif/if900b.h> +#include <nvif/if900d.h> +#include <nvif/unpack.h> + +int +gf100_mem_map(struct nvkm_mmu *mmu, struct nvkm_memory *memory, void *argv, + u32 argc, u64 *paddr, u64 *psize, struct nvkm_vma **pvma) +{ + struct gf100_vmm_map_v0 uvmm = {}; + union { + struct gf100_mem_map_vn vn; + struct gf100_mem_map_v0 v0; + } *args = argv; + struct nvkm_device *device = mmu->subdev.device; + struct nvkm_vmm *bar = nvkm_bar_bar1_vmm(device); + int ret = -ENOSYS; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + uvmm.ro = args->v0.ro; + uvmm.kind = args->v0.kind; + } else + if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) { + } else + return ret; + + ret = nvkm_vmm_get(bar, nvkm_memory_page(memory), + nvkm_memory_size(memory), pvma); + if (ret) + return ret; + + ret = nvkm_memory_map(memory, 0, bar, *pvma, &uvmm, sizeof(uvmm)); + if (ret) + return ret; + + *paddr = device->func->resource_addr(device, 1) + (*pvma)->addr; + *psize = (*pvma)->size; + return 0; +} + +int +gf100_mem_new(struct nvkm_mmu *mmu, int type, u8 page, u64 size, + void *argv, u32 argc, struct nvkm_memory **pmemory) +{ + union { + struct gf100_mem_vn vn; + struct gf100_mem_v0 v0; + } *args = argv; + int ret = -ENOSYS; + bool contig; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + contig = args->v0.contig; + } else + if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) { + contig = false; + } else + return ret; + + if (mmu->type[type].type & (NVKM_MEM_DISP | NVKM_MEM_COMP)) + type = NVKM_RAM_MM_NORMAL; + else + type = NVKM_RAM_MM_MIXED; + + return nvkm_ram_get(mmu->subdev.device, type, 0x01, page, + size, contig, false, pmemory); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memnv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memnv04.c new file mode 100644 index 000000000000..79a3b0cc9f5b --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memnv04.c @@ -0,0 +1,69 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" + +#include <core/memory.h> +#include <subdev/fb.h> + +#include <nvif/if000b.h> +#include <nvif/unpack.h> + +int +nv04_mem_map(struct nvkm_mmu *mmu, struct nvkm_memory *memory, void *argv, + u32 argc, u64 *paddr, u64 *psize, struct nvkm_vma **pvma) +{ + union { + struct nv04_mem_map_vn vn; + } *args = argv; + struct nvkm_device *device = mmu->subdev.device; + const u64 addr = nvkm_memory_addr(memory); + int ret = -ENOSYS; + + if ((ret = nvif_unvers(ret, &argv, &argc, args->vn))) + return ret; + + *paddr = device->func->resource_addr(device, 1) + addr; + *psize = nvkm_memory_size(memory); + *pvma = ERR_PTR(-ENODEV); + return 0; +} + +int +nv04_mem_new(struct nvkm_mmu *mmu, int type, u8 page, u64 size, + void *argv, u32 argc, struct nvkm_memory **pmemory) +{ + union { + struct nv04_mem_vn vn; + } *args = argv; + int ret = -ENOSYS; + + if ((ret = nvif_unvers(ret, &argv, &argc, args->vn))) + return ret; + + if (mmu->type[type].type & NVKM_MEM_MAPPABLE) + type = NVKM_RAM_MM_NORMAL; + else + type = NVKM_RAM_MM_NOMAP; + + return nvkm_ram_get(mmu->subdev.device, type, 0x01, page, + size, true, false, pmemory); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memnv50.c new file mode 100644 index 000000000000..46759b89fc1f --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/memnv50.c @@ -0,0 +1,88 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" + +#include <core/memory.h> +#include <subdev/bar.h> +#include <subdev/fb.h> + +#include <nvif/class.h> +#include <nvif/if500b.h> +#include <nvif/if500d.h> +#include <nvif/unpack.h> + +int +nv50_mem_map(struct nvkm_mmu *mmu, struct nvkm_memory *memory, void *argv, + u32 argc, u64 *paddr, u64 *psize, struct nvkm_vma **pvma) +{ + struct nv50_vmm_map_v0 uvmm = {}; + union { + struct nv50_mem_map_vn vn; + struct nv50_mem_map_v0 v0; + } *args = argv; + struct nvkm_device *device = mmu->subdev.device; + struct nvkm_vmm *bar = nvkm_bar_bar1_vmm(device); + u64 size = nvkm_memory_size(memory); + int ret = -ENOSYS; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + uvmm.ro = args->v0.ro; + uvmm.kind = args->v0.kind; + uvmm.comp = args->v0.comp; + } else + if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) { + } else + return ret; + + ret = nvkm_vmm_get(bar, 12, size, pvma); + if (ret) + return ret; + + *paddr = device->func->resource_addr(device, 1) + (*pvma)->addr; + *psize = (*pvma)->size; + return nvkm_memory_map(memory, 0, bar, *pvma, &uvmm, sizeof(uvmm)); +} + +int +nv50_mem_new(struct nvkm_mmu *mmu, int type, u8 page, u64 size, + void *argv, u32 argc, struct nvkm_memory **pmemory) +{ + union { + struct nv50_mem_vn vn; + struct nv50_mem_v0 v0; + } *args = argv; + int ret = -ENOSYS; + bool contig; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + type = args->v0.bankswz ? 0x02 : 0x01; + contig = args->v0.contig; + } else + if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) { + type = 0x01; + contig = false; + } else + return -ENOSYS; + + return nvkm_ram_get(mmu->subdev.device, NVKM_RAM_MM_NORMAL, type, + page, size, contig, false, pmemory); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv04.c index 37927c3fdc3e..d201c887c2cd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv04.c @@ -21,129 +21,21 @@ * * Authors: Ben Skeggs */ -#include "nv04.h" +#include "mem.h" +#include "vmm.h" -#include <core/gpuobj.h> - -#define NV04_PDMA_SIZE (128 * 1024 * 1024) -#define NV04_PDMA_PAGE ( 4 * 1024) - -/******************************************************************************* - * VM map/unmap callbacks - ******************************************************************************/ - -static void -nv04_vm_map_sg(struct nvkm_vma *vma, struct nvkm_memory *pgt, - struct nvkm_mem *mem, u32 pte, u32 cnt, dma_addr_t *list) -{ - pte = 0x00008 + (pte * 4); - nvkm_kmap(pgt); - while (cnt) { - u32 page = PAGE_SIZE / NV04_PDMA_PAGE; - u32 phys = (u32)*list++; - while (cnt && page--) { - nvkm_wo32(pgt, pte, phys | 3); - phys += NV04_PDMA_PAGE; - pte += 4; - cnt -= 1; - } - } - nvkm_done(pgt); -} - -static void -nv04_vm_unmap(struct nvkm_vma *vma, struct nvkm_memory *pgt, u32 pte, u32 cnt) -{ - pte = 0x00008 + (pte * 4); - nvkm_kmap(pgt); - while (cnt--) { - nvkm_wo32(pgt, pte, 0x00000000); - pte += 4; - } - nvkm_done(pgt); -} - -static void -nv04_vm_flush(struct nvkm_vm *vm) -{ -} - -/******************************************************************************* - * MMU subdev - ******************************************************************************/ - -static int -nv04_mmu_oneinit(struct nvkm_mmu *base) -{ - struct nv04_mmu *mmu = nv04_mmu(base); - struct nvkm_device *device = mmu->base.subdev.device; - struct nvkm_memory *dma; - int ret; - - ret = nvkm_vm_create(&mmu->base, 0, NV04_PDMA_SIZE, 0, 4096, NULL, - &mmu->vm); - if (ret) - return ret; - - ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, - (NV04_PDMA_SIZE / NV04_PDMA_PAGE) * 4 + 8, - 16, true, &dma); - mmu->vm->pgt[0].mem[0] = dma; - mmu->vm->pgt[0].refcount[0] = 1; - if (ret) - return ret; - - nvkm_kmap(dma); - nvkm_wo32(dma, 0x00000, 0x0002103d); /* PCI, RW, PT, !LN */ - nvkm_wo32(dma, 0x00004, NV04_PDMA_SIZE - 1); - nvkm_done(dma); - return 0; -} - -void * -nv04_mmu_dtor(struct nvkm_mmu *base) -{ - struct nv04_mmu *mmu = nv04_mmu(base); - struct nvkm_device *device = mmu->base.subdev.device; - if (mmu->vm) { - nvkm_memory_del(&mmu->vm->pgt[0].mem[0]); - nvkm_vm_ref(NULL, &mmu->vm, NULL); - } - if (mmu->nullp) { - dma_free_coherent(device->dev, 16 * 1024, - mmu->nullp, mmu->null); - } - return mmu; -} - -int -nv04_mmu_new_(const struct nvkm_mmu_func *func, struct nvkm_device *device, - int index, struct nvkm_mmu **pmmu) -{ - struct nv04_mmu *mmu; - if (!(mmu = kzalloc(sizeof(*mmu), GFP_KERNEL))) - return -ENOMEM; - *pmmu = &mmu->base; - nvkm_mmu_ctor(func, device, index, &mmu->base); - return 0; -} +#include <nvif/class.h> const struct nvkm_mmu_func nv04_mmu = { - .oneinit = nv04_mmu_oneinit, - .dtor = nv04_mmu_dtor, - .limit = NV04_PDMA_SIZE, .dma_bits = 32, - .pgt_bits = 32 - 12, - .spg_shift = 12, - .lpg_shift = 12, - .map_sg = nv04_vm_map_sg, - .unmap = nv04_vm_unmap, - .flush = nv04_vm_flush, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_NV04}}, + .mem = {{ -1, -1, NVIF_CLASS_MEM_NV04}, nv04_mem_new, nv04_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_NV04}, nv04_vmm_new, true }, }; int nv04_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) { - return nv04_mmu_new_(&nv04_mmu, device, index, pmmu); + return nvkm_mmu_new_(&nv04_mmu, device, index, pmmu); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv04.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv04.h deleted file mode 100644 index 363e33b296d5..000000000000 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv04.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef __NV04_MMU_PRIV__ -#define __NV04_MMU_PRIV__ -#define nv04_mmu(p) container_of((p), struct nv04_mmu, base) -#include "priv.h" - -struct nv04_mmu { - struct nvkm_mmu base; - struct nvkm_vm *vm; - dma_addr_t null; - void *nullp; -}; - -int nv04_mmu_new_(const struct nvkm_mmu_func *, struct nvkm_device *, - int index, struct nvkm_mmu **); -void *nv04_mmu_dtor(struct nvkm_mmu *); - -extern const struct nvkm_mmu_func nv04_mmu; -#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv41.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv41.c index c6a26f907009..adca81895c09 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv41.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv41.c @@ -21,113 +21,29 @@ * * Authors: Ben Skeggs */ -#include "nv04.h" +#include "mem.h" +#include "vmm.h" -#include <core/gpuobj.h> #include <core/option.h> -#include <subdev/timer.h> -#define NV41_GART_SIZE (512 * 1024 * 1024) -#define NV41_GART_PAGE ( 4 * 1024) - -/******************************************************************************* - * VM map/unmap callbacks - ******************************************************************************/ - -static void -nv41_vm_map_sg(struct nvkm_vma *vma, struct nvkm_memory *pgt, - struct nvkm_mem *mem, u32 pte, u32 cnt, dma_addr_t *list) -{ - pte = pte * 4; - nvkm_kmap(pgt); - while (cnt) { - u32 page = PAGE_SIZE / NV41_GART_PAGE; - u64 phys = (u64)*list++; - while (cnt && page--) { - nvkm_wo32(pgt, pte, (phys >> 7) | 1); - phys += NV41_GART_PAGE; - pte += 4; - cnt -= 1; - } - } - nvkm_done(pgt); -} - -static void -nv41_vm_unmap(struct nvkm_vma *vma, struct nvkm_memory *pgt, u32 pte, u32 cnt) -{ - pte = pte * 4; - nvkm_kmap(pgt); - while (cnt--) { - nvkm_wo32(pgt, pte, 0x00000000); - pte += 4; - } - nvkm_done(pgt); -} - -static void -nv41_vm_flush(struct nvkm_vm *vm) -{ - struct nv04_mmu *mmu = nv04_mmu(vm->mmu); - struct nvkm_device *device = mmu->base.subdev.device; - - mutex_lock(&mmu->base.subdev.mutex); - nvkm_wr32(device, 0x100810, 0x00000022); - nvkm_msec(device, 2000, - if (nvkm_rd32(device, 0x100810) & 0x00000020) - break; - ); - nvkm_wr32(device, 0x100810, 0x00000000); - mutex_unlock(&mmu->base.subdev.mutex); -} - -/******************************************************************************* - * MMU subdev - ******************************************************************************/ - -static int -nv41_mmu_oneinit(struct nvkm_mmu *base) -{ - struct nv04_mmu *mmu = nv04_mmu(base); - struct nvkm_device *device = mmu->base.subdev.device; - int ret; - - ret = nvkm_vm_create(&mmu->base, 0, NV41_GART_SIZE, 0, 4096, NULL, - &mmu->vm); - if (ret) - return ret; - - ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, - (NV41_GART_SIZE / NV41_GART_PAGE) * 4, 16, true, - &mmu->vm->pgt[0].mem[0]); - mmu->vm->pgt[0].refcount[0] = 1; - return ret; -} +#include <nvif/class.h> static void -nv41_mmu_init(struct nvkm_mmu *base) +nv41_mmu_init(struct nvkm_mmu *mmu) { - struct nv04_mmu *mmu = nv04_mmu(base); - struct nvkm_device *device = mmu->base.subdev.device; - struct nvkm_memory *dma = mmu->vm->pgt[0].mem[0]; - nvkm_wr32(device, 0x100800, 0x00000002 | nvkm_memory_addr(dma)); + struct nvkm_device *device = mmu->subdev.device; + nvkm_wr32(device, 0x100800, 0x00000002 | mmu->vmm->pd->pt[0]->addr); nvkm_mask(device, 0x10008c, 0x00000100, 0x00000100); nvkm_wr32(device, 0x100820, 0x00000000); } static const struct nvkm_mmu_func nv41_mmu = { - .dtor = nv04_mmu_dtor, - .oneinit = nv41_mmu_oneinit, .init = nv41_mmu_init, - .limit = NV41_GART_SIZE, .dma_bits = 39, - .pgt_bits = 32 - 12, - .spg_shift = 12, - .lpg_shift = 12, - .map_sg = nv41_vm_map_sg, - .unmap = nv41_vm_unmap, - .flush = nv41_vm_flush, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_NV04}}, + .mem = {{ -1, -1, NVIF_CLASS_MEM_NV04}, nv04_mem_new, nv04_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_NV04}, nv41_vmm_new, true }, }; int @@ -137,5 +53,5 @@ nv41_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) !nvkm_boolopt(device->cfgopt, "NvPCIE", true)) return nv04_mmu_new(device, index, pmmu); - return nv04_mmu_new_(&nv41_mmu, device, index, pmmu); + return nvkm_mmu_new_(&nv41_mmu, device, index, pmmu); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv44.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv44.c index a648c2395545..598c53a27bde 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv44.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv44.c @@ -21,176 +21,18 @@ * * Authors: Ben Skeggs */ -#include "nv04.h" +#include "mem.h" +#include "vmm.h" -#include <core/gpuobj.h> #include <core/option.h> -#include <subdev/timer.h> -#define NV44_GART_SIZE (512 * 1024 * 1024) -#define NV44_GART_PAGE ( 4 * 1024) - -/******************************************************************************* - * VM map/unmap callbacks - ******************************************************************************/ - -static void -nv44_vm_fill(struct nvkm_memory *pgt, dma_addr_t null, - dma_addr_t *list, u32 pte, u32 cnt) -{ - u32 base = (pte << 2) & ~0x0000000f; - u32 tmp[4]; - - tmp[0] = nvkm_ro32(pgt, base + 0x0); - tmp[1] = nvkm_ro32(pgt, base + 0x4); - tmp[2] = nvkm_ro32(pgt, base + 0x8); - tmp[3] = nvkm_ro32(pgt, base + 0xc); - - while (cnt--) { - u32 addr = list ? (*list++ >> 12) : (null >> 12); - switch (pte++ & 0x3) { - case 0: - tmp[0] &= ~0x07ffffff; - tmp[0] |= addr; - break; - case 1: - tmp[0] &= ~0xf8000000; - tmp[0] |= addr << 27; - tmp[1] &= ~0x003fffff; - tmp[1] |= addr >> 5; - break; - case 2: - tmp[1] &= ~0xffc00000; - tmp[1] |= addr << 22; - tmp[2] &= ~0x0001ffff; - tmp[2] |= addr >> 10; - break; - case 3: - tmp[2] &= ~0xfffe0000; - tmp[2] |= addr << 17; - tmp[3] &= ~0x00000fff; - tmp[3] |= addr >> 15; - break; - } - } - - nvkm_wo32(pgt, base + 0x0, tmp[0]); - nvkm_wo32(pgt, base + 0x4, tmp[1]); - nvkm_wo32(pgt, base + 0x8, tmp[2]); - nvkm_wo32(pgt, base + 0xc, tmp[3] | 0x40000000); -} - -static void -nv44_vm_map_sg(struct nvkm_vma *vma, struct nvkm_memory *pgt, - struct nvkm_mem *mem, u32 pte, u32 cnt, dma_addr_t *list) -{ - struct nv04_mmu *mmu = nv04_mmu(vma->vm->mmu); - u32 tmp[4]; - int i; - - nvkm_kmap(pgt); - if (pte & 3) { - u32 max = 4 - (pte & 3); - u32 part = (cnt > max) ? max : cnt; - nv44_vm_fill(pgt, mmu->null, list, pte, part); - pte += part; - list += part; - cnt -= part; - } - - while (cnt >= 4) { - for (i = 0; i < 4; i++) - tmp[i] = *list++ >> 12; - nvkm_wo32(pgt, pte++ * 4, tmp[0] >> 0 | tmp[1] << 27); - nvkm_wo32(pgt, pte++ * 4, tmp[1] >> 5 | tmp[2] << 22); - nvkm_wo32(pgt, pte++ * 4, tmp[2] >> 10 | tmp[3] << 17); - nvkm_wo32(pgt, pte++ * 4, tmp[3] >> 15 | 0x40000000); - cnt -= 4; - } - - if (cnt) - nv44_vm_fill(pgt, mmu->null, list, pte, cnt); - nvkm_done(pgt); -} - -static void -nv44_vm_unmap(struct nvkm_vma *vma, struct nvkm_memory *pgt, u32 pte, u32 cnt) -{ - struct nv04_mmu *mmu = nv04_mmu(vma->vm->mmu); - - nvkm_kmap(pgt); - if (pte & 3) { - u32 max = 4 - (pte & 3); - u32 part = (cnt > max) ? max : cnt; - nv44_vm_fill(pgt, mmu->null, NULL, pte, part); - pte += part; - cnt -= part; - } - - while (cnt >= 4) { - nvkm_wo32(pgt, pte++ * 4, 0x00000000); - nvkm_wo32(pgt, pte++ * 4, 0x00000000); - nvkm_wo32(pgt, pte++ * 4, 0x00000000); - nvkm_wo32(pgt, pte++ * 4, 0x00000000); - cnt -= 4; - } - - if (cnt) - nv44_vm_fill(pgt, mmu->null, NULL, pte, cnt); - nvkm_done(pgt); -} - -static void -nv44_vm_flush(struct nvkm_vm *vm) -{ - struct nv04_mmu *mmu = nv04_mmu(vm->mmu); - struct nvkm_device *device = mmu->base.subdev.device; - nvkm_wr32(device, 0x100814, mmu->base.limit - NV44_GART_PAGE); - nvkm_wr32(device, 0x100808, 0x00000020); - nvkm_msec(device, 2000, - if (nvkm_rd32(device, 0x100808) & 0x00000001) - break; - ); - nvkm_wr32(device, 0x100808, 0x00000000); -} - -/******************************************************************************* - * MMU subdev - ******************************************************************************/ - -static int -nv44_mmu_oneinit(struct nvkm_mmu *base) -{ - struct nv04_mmu *mmu = nv04_mmu(base); - struct nvkm_device *device = mmu->base.subdev.device; - int ret; - - mmu->nullp = dma_alloc_coherent(device->dev, 16 * 1024, - &mmu->null, GFP_KERNEL); - if (!mmu->nullp) { - nvkm_warn(&mmu->base.subdev, "unable to allocate dummy pages\n"); - mmu->null = 0; - } - - ret = nvkm_vm_create(&mmu->base, 0, NV44_GART_SIZE, 0, 4096, NULL, - &mmu->vm); - if (ret) - return ret; - - ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, - (NV44_GART_SIZE / NV44_GART_PAGE) * 4, - 512 * 1024, true, - &mmu->vm->pgt[0].mem[0]); - mmu->vm->pgt[0].refcount[0] = 1; - return ret; -} +#include <nvif/class.h> static void -nv44_mmu_init(struct nvkm_mmu *base) +nv44_mmu_init(struct nvkm_mmu *mmu) { - struct nv04_mmu *mmu = nv04_mmu(base); - struct nvkm_device *device = mmu->base.subdev.device; - struct nvkm_memory *gart = mmu->vm->pgt[0].mem[0]; + struct nvkm_device *device = mmu->subdev.device; + struct nvkm_memory *pt = mmu->vmm->pd->pt[0]->memory; u32 addr; /* calculate vram address of this PRAMIN block, object must be @@ -198,11 +40,11 @@ nv44_mmu_init(struct nvkm_mmu *base) * of 512KiB for this to work correctly */ addr = nvkm_rd32(device, 0x10020c); - addr -= ((nvkm_memory_addr(gart) >> 19) + 1) << 19; + addr -= ((nvkm_memory_addr(pt) >> 19) + 1) << 19; nvkm_wr32(device, 0x100850, 0x80000000); - nvkm_wr32(device, 0x100818, mmu->null); - nvkm_wr32(device, 0x100804, NV44_GART_SIZE); + nvkm_wr32(device, 0x100818, mmu->vmm->null); + nvkm_wr32(device, 0x100804, (nvkm_memory_size(pt) / 4) * 4096); nvkm_wr32(device, 0x100850, 0x00008000); nvkm_mask(device, 0x10008c, 0x00000200, 0x00000200); nvkm_wr32(device, 0x100820, 0x00000000); @@ -212,17 +54,11 @@ nv44_mmu_init(struct nvkm_mmu *base) static const struct nvkm_mmu_func nv44_mmu = { - .dtor = nv04_mmu_dtor, - .oneinit = nv44_mmu_oneinit, .init = nv44_mmu_init, - .limit = NV44_GART_SIZE, .dma_bits = 39, - .pgt_bits = 32 - 12, - .spg_shift = 12, - .lpg_shift = 12, - .map_sg = nv44_vm_map_sg, - .unmap = nv44_vm_unmap, - .flush = nv44_vm_flush, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_NV04}}, + .mem = {{ -1, -1, NVIF_CLASS_MEM_NV04}, nv04_mem_new, nv04_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_NV04}, nv44_vmm_new, true }, }; int @@ -232,5 +68,5 @@ nv44_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) !nvkm_boolopt(device->cfgopt, "NvPCIE", true)) return nv04_mmu_new(device, index, pmmu); - return nv04_mmu_new_(&nv44_mmu, device, index, pmmu); + return nvkm_mmu_new_(&nv44_mmu, device, index, pmmu); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv50.c index a1f8d65f0276..db3dfbbb2aa0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/nv50.c @@ -21,207 +21,52 @@ * * Authors: Ben Skeggs */ -#include "priv.h" +#include "mem.h" +#include "vmm.h" -#include <core/gpuobj.h> -#include <subdev/fb.h> -#include <subdev/timer.h> -#include <engine/gr.h> +#include <nvif/class.h> -static void -nv50_vm_map_pgt(struct nvkm_gpuobj *pgd, u32 pde, struct nvkm_memory *pgt[2]) +const u8 * +nv50_mmu_kind(struct nvkm_mmu *base, int *count) { - u64 phys = 0xdeadcafe00000000ULL; - u32 coverage = 0; - - if (pgt[0]) { - /* present, 4KiB pages */ - phys = 0x00000003 | nvkm_memory_addr(pgt[0]); - coverage = (nvkm_memory_size(pgt[0]) >> 3) << 12; - } else - if (pgt[1]) { - /* present, 64KiB pages */ - phys = 0x00000001 | nvkm_memory_addr(pgt[1]); - coverage = (nvkm_memory_size(pgt[1]) >> 3) << 16; - } - - if (phys & 1) { - if (coverage <= 32 * 1024 * 1024) - phys |= 0x60; - else if (coverage <= 64 * 1024 * 1024) - phys |= 0x40; - else if (coverage <= 128 * 1024 * 1024) - phys |= 0x20; - } - - nvkm_kmap(pgd); - nvkm_wo32(pgd, (pde * 8) + 0, lower_32_bits(phys)); - nvkm_wo32(pgd, (pde * 8) + 4, upper_32_bits(phys)); - nvkm_done(pgd); -} - -static inline u64 -vm_addr(struct nvkm_vma *vma, u64 phys, u32 memtype, u32 target) -{ - phys |= 1; /* present */ - phys |= (u64)memtype << 40; - phys |= target << 4; - if (vma->access & NV_MEM_ACCESS_SYS) - phys |= (1 << 6); - if (!(vma->access & NV_MEM_ACCESS_WO)) - phys |= (1 << 3); - return phys; -} - -static void -nv50_vm_map(struct nvkm_vma *vma, struct nvkm_memory *pgt, - struct nvkm_mem *mem, u32 pte, u32 cnt, u64 phys, u64 delta) -{ - struct nvkm_ram *ram = vma->vm->mmu->subdev.device->fb->ram; - u32 comp = (mem->memtype & 0x180) >> 7; - u32 block, target; - int i; - - /* IGPs don't have real VRAM, re-target to stolen system memory */ - target = 0; - if (ram->stolen) { - phys += ram->stolen; - target = 3; - } - - phys = vm_addr(vma, phys, mem->memtype, target); - pte <<= 3; - cnt <<= 3; - - nvkm_kmap(pgt); - while (cnt) { - u32 offset_h = upper_32_bits(phys); - u32 offset_l = lower_32_bits(phys); - - for (i = 7; i >= 0; i--) { - block = 1 << (i + 3); - if (cnt >= block && !(pte & (block - 1))) - break; - } - offset_l |= (i << 7); - - phys += block << (vma->node->type - 3); - cnt -= block; - if (comp) { - u32 tag = mem->tag->offset + ((delta >> 16) * comp); - offset_h |= (tag << 17); - delta += block << (vma->node->type - 3); - } - - while (block) { - nvkm_wo32(pgt, pte + 0, offset_l); - nvkm_wo32(pgt, pte + 4, offset_h); - pte += 8; - block -= 8; - } - } - nvkm_done(pgt); -} - -static void -nv50_vm_map_sg(struct nvkm_vma *vma, struct nvkm_memory *pgt, - struct nvkm_mem *mem, u32 pte, u32 cnt, dma_addr_t *list) -{ - u32 target = (vma->access & NV_MEM_ACCESS_NOSNOOP) ? 3 : 2; - pte <<= 3; - nvkm_kmap(pgt); - while (cnt--) { - u64 phys = vm_addr(vma, (u64)*list++, mem->memtype, target); - nvkm_wo32(pgt, pte + 0, lower_32_bits(phys)); - nvkm_wo32(pgt, pte + 4, upper_32_bits(phys)); - pte += 8; - } - nvkm_done(pgt); -} - -static void -nv50_vm_unmap(struct nvkm_vma *vma, struct nvkm_memory *pgt, u32 pte, u32 cnt) -{ - pte <<= 3; - nvkm_kmap(pgt); - while (cnt--) { - nvkm_wo32(pgt, pte + 0, 0x00000000); - nvkm_wo32(pgt, pte + 4, 0x00000000); - pte += 8; - } - nvkm_done(pgt); -} - -static void -nv50_vm_flush(struct nvkm_vm *vm) -{ - struct nvkm_mmu *mmu = vm->mmu; - struct nvkm_subdev *subdev = &mmu->subdev; - struct nvkm_device *device = subdev->device; - int i, vme; - - mutex_lock(&subdev->mutex); - for (i = 0; i < NVKM_SUBDEV_NR; i++) { - if (!atomic_read(&vm->engref[i])) - continue; - - /* unfortunate hw bug workaround... */ - if (i == NVKM_ENGINE_GR && device->gr) { - int ret = nvkm_gr_tlb_flush(device->gr); - if (ret != -ENODEV) - continue; - } - - switch (i) { - case NVKM_ENGINE_GR : vme = 0x00; break; - case NVKM_ENGINE_VP : - case NVKM_ENGINE_MSPDEC: vme = 0x01; break; - case NVKM_SUBDEV_BAR : vme = 0x06; break; - case NVKM_ENGINE_MSPPP : - case NVKM_ENGINE_MPEG : vme = 0x08; break; - case NVKM_ENGINE_BSP : - case NVKM_ENGINE_MSVLD : vme = 0x09; break; - case NVKM_ENGINE_CIPHER: - case NVKM_ENGINE_SEC : vme = 0x0a; break; - case NVKM_ENGINE_CE0 : vme = 0x0d; break; - default: - continue; - } - - nvkm_wr32(device, 0x100c80, (vme << 16) | 1); - if (nvkm_msec(device, 2000, - if (!(nvkm_rd32(device, 0x100c80) & 0x00000001)) - break; - ) < 0) - nvkm_error(subdev, "vm flush timeout: engine %d\n", vme); - } - mutex_unlock(&subdev->mutex); -} - -static int -nv50_vm_create(struct nvkm_mmu *mmu, u64 offset, u64 length, u64 mm_offset, - struct lock_class_key *key, struct nvkm_vm **pvm) -{ - u32 block = (1 << (mmu->func->pgt_bits + 12)); - if (block > length) - block = length; - - return nvkm_vm_create(mmu, offset, length, mm_offset, block, key, pvm); + /* 0x01: no bank swizzle + * 0x02: bank swizzled + * 0x7f: invalid + * + * 0x01/0x02 are values understood by the VRAM allocator, + * and are required to avoid mixing the two types within + * a certain range. + */ + static const u8 + kind[128] = { + 0x01, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, /* 0x00 */ + 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, + 0x01, 0x01, 0x01, 0x01, 0x7f, 0x7f, 0x7f, 0x7f, /* 0x10 */ + 0x02, 0x02, 0x02, 0x02, 0x7f, 0x7f, 0x7f, 0x7f, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x7f, /* 0x20 */ + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, /* 0x30 */ + 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, /* 0x40 */ + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x7f, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f, 0x01, 0x01, 0x01, 0x7f, /* 0x50 */ + 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x7f, /* 0x60 */ + 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, + 0x01, 0x7f, 0x02, 0x7f, 0x01, 0x7f, 0x02, 0x7f, /* 0x70 */ + 0x01, 0x01, 0x02, 0x02, 0x01, 0x01, 0x7f, 0x7f + }; + *count = ARRAY_SIZE(kind); + return kind; } static const struct nvkm_mmu_func nv50_mmu = { - .limit = (1ULL << 40), .dma_bits = 40, - .pgt_bits = 29 - 12, - .spg_shift = 12, - .lpg_shift = 16, - .create = nv50_vm_create, - .map_pgt = nv50_vm_map_pgt, - .map = nv50_vm_map, - .map_sg = nv50_vm_map_sg, - .unmap = nv50_vm_unmap, - .flush = nv50_vm_flush, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_NV50}}, + .mem = {{ -1, 0, NVIF_CLASS_MEM_NV50}, nv50_mem_new, nv50_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_NV50}, nv50_vmm_new, false, 0x1400 }, + .kind = nv50_mmu_kind, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/priv.h index 27cedc60b507..948a48c21be4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_MMU_PRIV_H__ #define __NVKM_MMU_PRIV_H__ #define nvkm_mmu(p) container_of((p), struct nvkm_mmu, subdev) @@ -9,31 +10,57 @@ int nvkm_mmu_new_(const struct nvkm_mmu_func *, struct nvkm_device *, int index, struct nvkm_mmu **); struct nvkm_mmu_func { - void *(*dtor)(struct nvkm_mmu *); - int (*oneinit)(struct nvkm_mmu *); void (*init)(struct nvkm_mmu *); - u64 limit; u8 dma_bits; - u32 pgt_bits; - u8 spg_shift; - u8 lpg_shift; - - int (*create)(struct nvkm_mmu *, u64 offset, u64 length, u64 mm_offset, - struct lock_class_key *, struct nvkm_vm **); - - void (*map_pgt)(struct nvkm_gpuobj *pgd, u32 pde, - struct nvkm_memory *pgt[2]); - void (*map)(struct nvkm_vma *, struct nvkm_memory *, - struct nvkm_mem *, u32 pte, u32 cnt, - u64 phys, u64 delta); - void (*map_sg)(struct nvkm_vma *, struct nvkm_memory *, - struct nvkm_mem *, u32 pte, u32 cnt, dma_addr_t *); - void (*unmap)(struct nvkm_vma *, struct nvkm_memory *pgt, - u32 pte, u32 cnt); - void (*flush)(struct nvkm_vm *); + + struct { + struct nvkm_sclass user; + } mmu; + + struct { + struct nvkm_sclass user; + int (*vram)(struct nvkm_mmu *, int type, u8 page, u64 size, + void *argv, u32 argc, struct nvkm_memory **); + int (*umap)(struct nvkm_mmu *, struct nvkm_memory *, void *argv, + u32 argc, u64 *addr, u64 *size, struct nvkm_vma **); + } mem; + + struct { + struct nvkm_sclass user; + int (*ctor)(struct nvkm_mmu *, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *, + const char *name, struct nvkm_vmm **); + bool global; + u32 pd_offset; + } vmm; + + const u8 *(*kind)(struct nvkm_mmu *, int *count); + bool kind_sys; +}; + +extern const struct nvkm_mmu_func nv04_mmu; + +const u8 *nv50_mmu_kind(struct nvkm_mmu *, int *count); + +const u8 *gf100_mmu_kind(struct nvkm_mmu *, int *count); + +const u8 *gm200_mmu_kind(struct nvkm_mmu *, int *); + +struct nvkm_mmu_pt { + union { + struct nvkm_mmu_ptc *ptc; + struct nvkm_mmu_ptp *ptp; + }; + struct nvkm_memory *memory; + bool sub; + u16 base; + u64 addr; + struct list_head head; }; -int nvkm_vm_create(struct nvkm_mmu *, u64, u64, u64, u32, - struct lock_class_key *, struct nvkm_vm **); +void nvkm_mmu_ptc_dump(struct nvkm_mmu *); +struct nvkm_mmu_pt * +nvkm_mmu_ptc_get(struct nvkm_mmu *, u32 size, u32 align, bool zero); +void nvkm_mmu_ptc_put(struct nvkm_mmu *, bool force, struct nvkm_mmu_pt **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.c new file mode 100644 index 000000000000..fac2f9a45ea6 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.c @@ -0,0 +1,192 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "umem.h" +#include "ummu.h" + +#include <core/client.h> +#include <core/memory.h> +#include <subdev/bar.h> + +#include <nvif/class.h> +#include <nvif/if000a.h> +#include <nvif/unpack.h> + +static const struct nvkm_object_func nvkm_umem; +struct nvkm_memory * +nvkm_umem_search(struct nvkm_client *client, u64 handle) +{ + struct nvkm_client *master = client->object.client; + struct nvkm_memory *memory = NULL; + struct nvkm_object *object; + struct nvkm_umem *umem; + + object = nvkm_object_search(client, handle, &nvkm_umem); + if (IS_ERR(object)) { + if (client->super && client != master) { + spin_lock(&master->lock); + list_for_each_entry(umem, &master->umem, head) { + if (umem->object.object == handle) { + memory = nvkm_memory_ref(umem->memory); + break; + } + } + spin_unlock(&master->lock); + } + } else { + umem = nvkm_umem(object); + if (!umem->priv || client->super) + memory = nvkm_memory_ref(umem->memory); + } + + return memory ? memory : ERR_PTR(-ENOENT); +} + +static int +nvkm_umem_unmap(struct nvkm_object *object) +{ + struct nvkm_umem *umem = nvkm_umem(object); + + if (!umem->map) + return -EEXIST; + + if (umem->io) { + if (!IS_ERR(umem->bar)) { + struct nvkm_device *device = umem->mmu->subdev.device; + nvkm_vmm_put(nvkm_bar_bar1_vmm(device), &umem->bar); + } else { + umem->bar = NULL; + } + } else { + vunmap(umem->map); + umem->map = NULL; + } + + return 0; +} + +static int +nvkm_umem_map(struct nvkm_object *object, void *argv, u32 argc, + enum nvkm_object_map *type, u64 *handle, u64 *length) +{ + struct nvkm_umem *umem = nvkm_umem(object); + struct nvkm_mmu *mmu = umem->mmu; + + if (!umem->mappable) + return -EINVAL; + if (umem->map) + return -EEXIST; + + if ((umem->type & NVKM_MEM_HOST) && !argc) { + int ret = nvkm_mem_map_host(umem->memory, &umem->map); + if (ret) + return ret; + + *handle = (unsigned long)(void *)umem->map; + *length = nvkm_memory_size(umem->memory); + *type = NVKM_OBJECT_MAP_VA; + return 0; + } else + if ((umem->type & NVKM_MEM_VRAM) || + (umem->type & NVKM_MEM_KIND)) { + int ret = mmu->func->mem.umap(mmu, umem->memory, argv, argc, + handle, length, &umem->bar); + if (ret) + return ret; + + *type = NVKM_OBJECT_MAP_IO; + } else { + return -EINVAL; + } + + umem->io = (*type == NVKM_OBJECT_MAP_IO); + return 0; +} + +static void * +nvkm_umem_dtor(struct nvkm_object *object) +{ + struct nvkm_umem *umem = nvkm_umem(object); + spin_lock(&umem->object.client->lock); + list_del_init(&umem->head); + spin_unlock(&umem->object.client->lock); + nvkm_memory_unref(&umem->memory); + return umem; +} + +static const struct nvkm_object_func +nvkm_umem = { + .dtor = nvkm_umem_dtor, + .map = nvkm_umem_map, + .unmap = nvkm_umem_unmap, +}; + +int +nvkm_umem_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nvkm_object **pobject) +{ + struct nvkm_mmu *mmu = nvkm_ummu(oclass->parent)->mmu; + union { + struct nvif_mem_v0 v0; + } *args = argv; + struct nvkm_umem *umem; + int type, ret = -ENOSYS; + u8 page; + u64 size; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, true))) { + type = args->v0.type; + page = args->v0.page; + size = args->v0.size; + } else + return ret; + + if (type >= mmu->type_nr) + return -EINVAL; + + if (!(umem = kzalloc(sizeof(*umem), GFP_KERNEL))) + return -ENOMEM; + nvkm_object_ctor(&nvkm_umem, oclass, &umem->object); + umem->mmu = mmu; + umem->type = mmu->type[type].type; + umem->priv = oclass->client->super; + INIT_LIST_HEAD(&umem->head); + *pobject = &umem->object; + + if (mmu->type[type].type & NVKM_MEM_MAPPABLE) { + page = max_t(u8, page, PAGE_SHIFT); + umem->mappable = true; + } + + ret = nvkm_mem_new_type(mmu, type, page, size, argv, argc, + &umem->memory); + if (ret) + return ret; + + spin_lock(&umem->object.client->lock); + list_add(&umem->head, &umem->object.client->umem); + spin_unlock(&umem->object.client->lock); + + args->v0.page = nvkm_memory_page(umem->memory); + args->v0.addr = nvkm_memory_addr(umem->memory); + args->v0.size = nvkm_memory_size(umem->memory); + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.h new file mode 100644 index 000000000000..85cf692d620a --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.h @@ -0,0 +1,26 @@ +#ifndef __NVKM_UMEM_H__ +#define __NVKM_UMEM_H__ +#define nvkm_umem(p) container_of((p), struct nvkm_umem, object) +#include <core/object.h> +#include "mem.h" + +struct nvkm_umem { + struct nvkm_object object; + struct nvkm_mmu *mmu; + u8 type:8; + bool priv:1; + bool mappable:1; + bool io:1; + + struct nvkm_memory *memory; + struct list_head head; + + union { + struct nvkm_vma *bar; + void *map; + }; +}; + +int nvkm_umem_new(const struct nvkm_oclass *, void *argv, u32 argc, + struct nvkm_object **); +#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.c new file mode 100644 index 000000000000..353f10f92b77 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.c @@ -0,0 +1,178 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ummu.h" +#include "umem.h" +#include "uvmm.h" + +#include <core/client.h> + +#include <nvif/if0008.h> +#include <nvif/unpack.h> + +static int +nvkm_ummu_sclass(struct nvkm_object *object, int index, + struct nvkm_oclass *oclass) +{ + struct nvkm_mmu *mmu = nvkm_ummu(object)->mmu; + + if (mmu->func->mem.user.oclass && oclass->client->super) { + if (index-- == 0) { + oclass->base = mmu->func->mem.user; + oclass->ctor = nvkm_umem_new; + return 0; + } + } + + if (mmu->func->vmm.user.oclass) { + if (index-- == 0) { + oclass->base = mmu->func->vmm.user; + oclass->ctor = nvkm_uvmm_new; + return 0; + } + } + + return -EINVAL; +} + +static int +nvkm_ummu_heap(struct nvkm_ummu *ummu, void *argv, u32 argc) +{ + struct nvkm_mmu *mmu = ummu->mmu; + union { + struct nvif_mmu_heap_v0 v0; + } *args = argv; + int ret = -ENOSYS; + u8 index; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + if ((index = args->v0.index) >= mmu->heap_nr) + return -EINVAL; + args->v0.size = mmu->heap[index].size; + } else + return ret; + + return 0; +} + +static int +nvkm_ummu_type(struct nvkm_ummu *ummu, void *argv, u32 argc) +{ + struct nvkm_mmu *mmu = ummu->mmu; + union { + struct nvif_mmu_type_v0 v0; + } *args = argv; + int ret = -ENOSYS; + u8 type, index; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + if ((index = args->v0.index) >= mmu->type_nr) + return -EINVAL; + type = mmu->type[index].type; + args->v0.heap = mmu->type[index].heap; + args->v0.vram = !!(type & NVKM_MEM_VRAM); + args->v0.host = !!(type & NVKM_MEM_HOST); + args->v0.comp = !!(type & NVKM_MEM_COMP); + args->v0.disp = !!(type & NVKM_MEM_DISP); + args->v0.kind = !!(type & NVKM_MEM_KIND); + args->v0.mappable = !!(type & NVKM_MEM_MAPPABLE); + args->v0.coherent = !!(type & NVKM_MEM_COHERENT); + args->v0.uncached = !!(type & NVKM_MEM_UNCACHED); + } else + return ret; + + return 0; +} + +static int +nvkm_ummu_kind(struct nvkm_ummu *ummu, void *argv, u32 argc) +{ + struct nvkm_mmu *mmu = ummu->mmu; + union { + struct nvif_mmu_kind_v0 v0; + } *args = argv; + const u8 *kind = NULL; + int ret = -ENOSYS, count = 0; + + if (mmu->func->kind) + kind = mmu->func->kind(mmu, &count); + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, true))) { + if (argc != args->v0.count * sizeof(*args->v0.data)) + return -EINVAL; + if (args->v0.count > count) + return -EINVAL; + memcpy(args->v0.data, kind, args->v0.count); + } else + return ret; + + return 0; +} + +static int +nvkm_ummu_mthd(struct nvkm_object *object, u32 mthd, void *argv, u32 argc) +{ + struct nvkm_ummu *ummu = nvkm_ummu(object); + switch (mthd) { + case NVIF_MMU_V0_HEAP: return nvkm_ummu_heap(ummu, argv, argc); + case NVIF_MMU_V0_TYPE: return nvkm_ummu_type(ummu, argv, argc); + case NVIF_MMU_V0_KIND: return nvkm_ummu_kind(ummu, argv, argc); + default: + break; + } + return -EINVAL; +} + +static const struct nvkm_object_func +nvkm_ummu = { + .mthd = nvkm_ummu_mthd, + .sclass = nvkm_ummu_sclass, +}; + +int +nvkm_ummu_new(struct nvkm_device *device, const struct nvkm_oclass *oclass, + void *argv, u32 argc, struct nvkm_object **pobject) +{ + union { + struct nvif_mmu_v0 v0; + } *args = argv; + struct nvkm_mmu *mmu = device->mmu; + struct nvkm_ummu *ummu; + int ret = -ENOSYS, kinds = 0; + + if (mmu->func->kind) + mmu->func->kind(mmu, &kinds); + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + args->v0.dmabits = mmu->dma_bits; + args->v0.heap_nr = mmu->heap_nr; + args->v0.type_nr = mmu->type_nr; + args->v0.kind_nr = kinds; + } else + return ret; + + if (!(ummu = kzalloc(sizeof(*ummu), GFP_KERNEL))) + return -ENOMEM; + nvkm_object_ctor(&nvkm_ummu, oclass, &ummu->object); + ummu->mmu = mmu; + *pobject = &ummu->object; + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.h new file mode 100644 index 000000000000..0cd510dcfc68 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.h @@ -0,0 +1,14 @@ +#ifndef __NVKM_UMMU_H__ +#define __NVKM_UMMU_H__ +#define nvkm_ummu(p) container_of((p), struct nvkm_ummu, object) +#include <core/object.h> +#include "priv.h" + +struct nvkm_ummu { + struct nvkm_object object; + struct nvkm_mmu *mmu; +}; + +int nvkm_ummu_new(struct nvkm_device *, const struct nvkm_oclass *, + void *argv, u32 argc, struct nvkm_object **); +#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c new file mode 100644 index 000000000000..fa81d0c1ba41 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c @@ -0,0 +1,352 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "uvmm.h" +#include "umem.h" +#include "ummu.h" + +#include <core/client.h> +#include <core/memory.h> + +#include <nvif/if000c.h> +#include <nvif/unpack.h> + +static const struct nvkm_object_func nvkm_uvmm; +struct nvkm_vmm * +nvkm_uvmm_search(struct nvkm_client *client, u64 handle) +{ + struct nvkm_object *object; + + object = nvkm_object_search(client, handle, &nvkm_uvmm); + if (IS_ERR(object)) + return (void *)object; + + return nvkm_uvmm(object)->vmm; +} + +static int +nvkm_uvmm_mthd_unmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc) +{ + struct nvkm_client *client = uvmm->object.client; + union { + struct nvif_vmm_unmap_v0 v0; + } *args = argv; + struct nvkm_vmm *vmm = uvmm->vmm; + struct nvkm_vma *vma; + int ret = -ENOSYS; + u64 addr; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + addr = args->v0.addr; + } else + return ret; + + mutex_lock(&vmm->mutex); + vma = nvkm_vmm_node_search(vmm, addr); + if (ret = -ENOENT, !vma || vma->addr != addr) { + VMM_DEBUG(vmm, "lookup %016llx: %016llx", + addr, vma ? vma->addr : ~0ULL); + goto done; + } + + if (ret = -ENOENT, (!vma->user && !client->super) || vma->busy) { + VMM_DEBUG(vmm, "denied %016llx: %d %d %d", addr, + vma->user, !client->super, vma->busy); + goto done; + } + + if (ret = -EINVAL, !vma->memory) { + VMM_DEBUG(vmm, "unmapped"); + goto done; + } + + nvkm_vmm_unmap_locked(vmm, vma); + ret = 0; +done: + mutex_unlock(&vmm->mutex); + return ret; +} + +static int +nvkm_uvmm_mthd_map(struct nvkm_uvmm *uvmm, void *argv, u32 argc) +{ + struct nvkm_client *client = uvmm->object.client; + union { + struct nvif_vmm_map_v0 v0; + } *args = argv; + u64 addr, size, handle, offset; + struct nvkm_vmm *vmm = uvmm->vmm; + struct nvkm_vma *vma; + struct nvkm_memory *memory; + int ret = -ENOSYS; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, true))) { + addr = args->v0.addr; + size = args->v0.size; + handle = args->v0.memory; + offset = args->v0.offset; + } else + return ret; + + if (IS_ERR((memory = nvkm_umem_search(client, handle)))) { + VMM_DEBUG(vmm, "memory %016llx %ld\n", handle, PTR_ERR(memory)); + return PTR_ERR(memory); + } + + mutex_lock(&vmm->mutex); + if (ret = -ENOENT, !(vma = nvkm_vmm_node_search(vmm, addr))) { + VMM_DEBUG(vmm, "lookup %016llx", addr); + goto fail; + } + + if (ret = -ENOENT, (!vma->user && !client->super) || vma->busy) { + VMM_DEBUG(vmm, "denied %016llx: %d %d %d", addr, + vma->user, !client->super, vma->busy); + goto fail; + } + + if (ret = -EINVAL, vma->addr != addr || vma->size != size) { + if (addr + size > vma->addr + vma->size || vma->memory || + (vma->refd == NVKM_VMA_PAGE_NONE && !vma->mapref)) { + VMM_DEBUG(vmm, "split %d %d %d " + "%016llx %016llx %016llx %016llx", + !!vma->memory, vma->refd, vma->mapref, + addr, size, vma->addr, (u64)vma->size); + goto fail; + } + + if (vma->addr != addr) { + const u64 tail = vma->size + vma->addr - addr; + if (ret = -ENOMEM, !(vma = nvkm_vma_tail(vma, tail))) + goto fail; + vma->part = true; + nvkm_vmm_node_insert(vmm, vma); + } + + if (vma->size != size) { + const u64 tail = vma->size - size; + struct nvkm_vma *tmp; + if (ret = -ENOMEM, !(tmp = nvkm_vma_tail(vma, tail))) { + nvkm_vmm_unmap_region(vmm, vma); + goto fail; + } + tmp->part = true; + nvkm_vmm_node_insert(vmm, tmp); + } + } + vma->busy = true; + mutex_unlock(&vmm->mutex); + + ret = nvkm_memory_map(memory, offset, vmm, vma, argv, argc); + if (ret == 0) { + /* Successful map will clear vma->busy. */ + nvkm_memory_unref(&memory); + return 0; + } + + mutex_lock(&vmm->mutex); + vma->busy = false; + nvkm_vmm_unmap_region(vmm, vma); +fail: + mutex_unlock(&vmm->mutex); + nvkm_memory_unref(&memory); + return ret; +} + +static int +nvkm_uvmm_mthd_put(struct nvkm_uvmm *uvmm, void *argv, u32 argc) +{ + struct nvkm_client *client = uvmm->object.client; + union { + struct nvif_vmm_put_v0 v0; + } *args = argv; + struct nvkm_vmm *vmm = uvmm->vmm; + struct nvkm_vma *vma; + int ret = -ENOSYS; + u64 addr; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + addr = args->v0.addr; + } else + return ret; + + mutex_lock(&vmm->mutex); + vma = nvkm_vmm_node_search(vmm, args->v0.addr); + if (ret = -ENOENT, !vma || vma->addr != addr || vma->part) { + VMM_DEBUG(vmm, "lookup %016llx: %016llx %d", addr, + vma ? vma->addr : ~0ULL, vma ? vma->part : 0); + goto done; + } + + if (ret = -ENOENT, (!vma->user && !client->super) || vma->busy) { + VMM_DEBUG(vmm, "denied %016llx: %d %d %d", addr, + vma->user, !client->super, vma->busy); + goto done; + } + + nvkm_vmm_put_locked(vmm, vma); + ret = 0; +done: + mutex_unlock(&vmm->mutex); + return ret; +} + +static int +nvkm_uvmm_mthd_get(struct nvkm_uvmm *uvmm, void *argv, u32 argc) +{ + struct nvkm_client *client = uvmm->object.client; + union { + struct nvif_vmm_get_v0 v0; + } *args = argv; + struct nvkm_vmm *vmm = uvmm->vmm; + struct nvkm_vma *vma; + int ret = -ENOSYS; + bool getref, mapref, sparse; + u8 page, align; + u64 size; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + getref = args->v0.type == NVIF_VMM_GET_V0_PTES; + mapref = args->v0.type == NVIF_VMM_GET_V0_ADDR; + sparse = args->v0.sparse; + page = args->v0.page; + align = args->v0.align; + size = args->v0.size; + } else + return ret; + + mutex_lock(&vmm->mutex); + ret = nvkm_vmm_get_locked(vmm, getref, mapref, sparse, + page, align, size, &vma); + mutex_unlock(&vmm->mutex); + if (ret) + return ret; + + args->v0.addr = vma->addr; + vma->user = !client->super; + return ret; +} + +static int +nvkm_uvmm_mthd_page(struct nvkm_uvmm *uvmm, void *argv, u32 argc) +{ + union { + struct nvif_vmm_page_v0 v0; + } *args = argv; + const struct nvkm_vmm_page *page; + int ret = -ENOSYS; + u8 type, index, nr; + + page = uvmm->vmm->func->page; + for (nr = 0; page[nr].shift; nr++); + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + if ((index = args->v0.index) >= nr) + return -EINVAL; + type = page[index].type; + args->v0.shift = page[index].shift; + args->v0.sparse = !!(type & NVKM_VMM_PAGE_SPARSE); + args->v0.vram = !!(type & NVKM_VMM_PAGE_VRAM); + args->v0.host = !!(type & NVKM_VMM_PAGE_HOST); + args->v0.comp = !!(type & NVKM_VMM_PAGE_COMP); + } else + return -ENOSYS; + + return 0; +} + +static int +nvkm_uvmm_mthd(struct nvkm_object *object, u32 mthd, void *argv, u32 argc) +{ + struct nvkm_uvmm *uvmm = nvkm_uvmm(object); + switch (mthd) { + case NVIF_VMM_V0_PAGE : return nvkm_uvmm_mthd_page (uvmm, argv, argc); + case NVIF_VMM_V0_GET : return nvkm_uvmm_mthd_get (uvmm, argv, argc); + case NVIF_VMM_V0_PUT : return nvkm_uvmm_mthd_put (uvmm, argv, argc); + case NVIF_VMM_V0_MAP : return nvkm_uvmm_mthd_map (uvmm, argv, argc); + case NVIF_VMM_V0_UNMAP : return nvkm_uvmm_mthd_unmap (uvmm, argv, argc); + default: + break; + } + return -EINVAL; +} + +static void * +nvkm_uvmm_dtor(struct nvkm_object *object) +{ + struct nvkm_uvmm *uvmm = nvkm_uvmm(object); + nvkm_vmm_unref(&uvmm->vmm); + return uvmm; +} + +static const struct nvkm_object_func +nvkm_uvmm = { + .dtor = nvkm_uvmm_dtor, + .mthd = nvkm_uvmm_mthd, +}; + +int +nvkm_uvmm_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nvkm_object **pobject) +{ + struct nvkm_mmu *mmu = nvkm_ummu(oclass->parent)->mmu; + const bool more = oclass->base.maxver >= 0; + union { + struct nvif_vmm_v0 v0; + } *args = argv; + const struct nvkm_vmm_page *page; + struct nvkm_uvmm *uvmm; + int ret = -ENOSYS; + u64 addr, size; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, more))) { + addr = args->v0.addr; + size = args->v0.size; + } else + return ret; + + if (!(uvmm = kzalloc(sizeof(*uvmm), GFP_KERNEL))) + return -ENOMEM; + nvkm_object_ctor(&nvkm_uvmm, oclass, &uvmm->object); + *pobject = &uvmm->object; + + if (!mmu->vmm) { + ret = mmu->func->vmm.ctor(mmu, addr, size, argv, argc, + NULL, "user", &uvmm->vmm); + if (ret) + return ret; + + uvmm->vmm->debug = max(uvmm->vmm->debug, oclass->client->debug); + } else { + if (size) + return -EINVAL; + + uvmm->vmm = nvkm_vmm_ref(mmu->vmm); + } + + page = uvmm->vmm->func->page; + args->v0.page_nr = 0; + while (page && (page++)->shift) + args->v0.page_nr++; + args->v0.addr = uvmm->vmm->start; + args->v0.size = uvmm->vmm->limit; + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.h new file mode 100644 index 000000000000..71dab55e18a9 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.h @@ -0,0 +1,14 @@ +#ifndef __NVKM_UVMM_H__ +#define __NVKM_UVMM_H__ +#define nvkm_uvmm(p) container_of((p), struct nvkm_uvmm, object) +#include <core/object.h> +#include "vmm.h" + +struct nvkm_uvmm { + struct nvkm_object object; + struct nvkm_vmm *vmm; +}; + +int nvkm_uvmm_new(const struct nvkm_oclass *, void *argv, u32 argc, + struct nvkm_object **); +#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c new file mode 100644 index 000000000000..e35d3e17cd7c --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c @@ -0,0 +1,1513 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#define NVKM_VMM_LEVELS_MAX 5 +#include "vmm.h" + +#include <subdev/fb.h> + +static void +nvkm_vmm_pt_del(struct nvkm_vmm_pt **ppgt) +{ + struct nvkm_vmm_pt *pgt = *ppgt; + if (pgt) { + kvfree(pgt->pde); + kfree(pgt); + *ppgt = NULL; + } +} + + +static struct nvkm_vmm_pt * +nvkm_vmm_pt_new(const struct nvkm_vmm_desc *desc, bool sparse, + const struct nvkm_vmm_page *page) +{ + const u32 pten = 1 << desc->bits; + struct nvkm_vmm_pt *pgt; + u32 lpte = 0; + + if (desc->type > PGT) { + if (desc->type == SPT) { + const struct nvkm_vmm_desc *pair = page[-1].desc; + lpte = pten >> (desc->bits - pair->bits); + } else { + lpte = pten; + } + } + + if (!(pgt = kzalloc(sizeof(*pgt) + lpte, GFP_KERNEL))) + return NULL; + pgt->page = page ? page->shift : 0; + pgt->sparse = sparse; + + if (desc->type == PGD) { + pgt->pde = kvzalloc(sizeof(*pgt->pde) * pten, GFP_KERNEL); + if (!pgt->pde) { + kfree(pgt); + return NULL; + } + } + + return pgt; +} + +struct nvkm_vmm_iter { + const struct nvkm_vmm_page *page; + const struct nvkm_vmm_desc *desc; + struct nvkm_vmm *vmm; + u64 cnt; + u16 max, lvl; + u32 pte[NVKM_VMM_LEVELS_MAX]; + struct nvkm_vmm_pt *pt[NVKM_VMM_LEVELS_MAX]; + int flush; +}; + +#ifdef CONFIG_NOUVEAU_DEBUG_MMU +static const char * +nvkm_vmm_desc_type(const struct nvkm_vmm_desc *desc) +{ + switch (desc->type) { + case PGD: return "PGD"; + case PGT: return "PGT"; + case SPT: return "SPT"; + case LPT: return "LPT"; + default: + return "UNKNOWN"; + } +} + +static void +nvkm_vmm_trace(struct nvkm_vmm_iter *it, char *buf) +{ + int lvl; + for (lvl = it->max; lvl >= 0; lvl--) { + if (lvl >= it->lvl) + buf += sprintf(buf, "%05x:", it->pte[lvl]); + else + buf += sprintf(buf, "xxxxx:"); + } +} + +#define TRA(i,f,a...) do { \ + char _buf[NVKM_VMM_LEVELS_MAX * 7]; \ + struct nvkm_vmm_iter *_it = (i); \ + nvkm_vmm_trace(_it, _buf); \ + VMM_TRACE(_it->vmm, "%s "f, _buf, ##a); \ +} while(0) +#else +#define TRA(i,f,a...) +#endif + +static inline void +nvkm_vmm_flush_mark(struct nvkm_vmm_iter *it) +{ + it->flush = min(it->flush, it->max - it->lvl); +} + +static inline void +nvkm_vmm_flush(struct nvkm_vmm_iter *it) +{ + if (it->flush != NVKM_VMM_LEVELS_MAX) { + if (it->vmm->func->flush) { + TRA(it, "flush: %d", it->flush); + it->vmm->func->flush(it->vmm, it->flush); + } + it->flush = NVKM_VMM_LEVELS_MAX; + } +} + +static void +nvkm_vmm_unref_pdes(struct nvkm_vmm_iter *it) +{ + const struct nvkm_vmm_desc *desc = it->desc; + const int type = desc[it->lvl].type == SPT; + struct nvkm_vmm_pt *pgd = it->pt[it->lvl + 1]; + struct nvkm_vmm_pt *pgt = it->pt[it->lvl]; + struct nvkm_mmu_pt *pt = pgt->pt[type]; + struct nvkm_vmm *vmm = it->vmm; + u32 pdei = it->pte[it->lvl + 1]; + + /* Recurse up the tree, unreferencing/destroying unneeded PDs. */ + it->lvl++; + if (--pgd->refs[0]) { + const struct nvkm_vmm_desc_func *func = desc[it->lvl].func; + /* PD has other valid PDEs, so we need a proper update. */ + TRA(it, "PDE unmap %s", nvkm_vmm_desc_type(&desc[it->lvl - 1])); + pgt->pt[type] = NULL; + if (!pgt->refs[!type]) { + /* PDE no longer required. */ + if (pgd->pt[0]) { + if (pgt->sparse) { + func->sparse(vmm, pgd->pt[0], pdei, 1); + pgd->pde[pdei] = NVKM_VMM_PDE_SPARSE; + } else { + func->unmap(vmm, pgd->pt[0], pdei, 1); + pgd->pde[pdei] = NULL; + } + } else { + /* Special handling for Tesla-class GPUs, + * where there's no central PD, but each + * instance has its own embedded PD. + */ + func->pde(vmm, pgd, pdei); + pgd->pde[pdei] = NULL; + } + } else { + /* PDE was pointing at dual-PTs and we're removing + * one of them, leaving the other in place. + */ + func->pde(vmm, pgd, pdei); + } + + /* GPU may have cached the PTs, flush before freeing. */ + nvkm_vmm_flush_mark(it); + nvkm_vmm_flush(it); + } else { + /* PD has no valid PDEs left, so we can just destroy it. */ + nvkm_vmm_unref_pdes(it); + } + + /* Destroy PD/PT. */ + TRA(it, "PDE free %s", nvkm_vmm_desc_type(&desc[it->lvl - 1])); + nvkm_mmu_ptc_put(vmm->mmu, vmm->bootstrapped, &pt); + if (!pgt->refs[!type]) + nvkm_vmm_pt_del(&pgt); + it->lvl--; +} + +static void +nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, + const struct nvkm_vmm_desc *desc, u32 ptei, u32 ptes) +{ + const struct nvkm_vmm_desc *pair = it->page[-1].desc; + const u32 sptb = desc->bits - pair->bits; + const u32 sptn = 1 << sptb; + struct nvkm_vmm *vmm = it->vmm; + u32 spti = ptei & (sptn - 1), lpti, pteb; + + /* Determine how many SPTEs are being touched under each LPTE, + * and drop reference counts. + */ + for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) { + const u32 pten = min(sptn - spti, ptes); + pgt->pte[lpti] -= pten; + ptes -= pten; + } + + /* We're done here if there's no corresponding LPT. */ + if (!pgt->refs[0]) + return; + + for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) { + /* Skip over any LPTEs that still have valid SPTEs. */ + if (pgt->pte[pteb] & NVKM_VMM_PTE_SPTES) { + for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { + if (!(pgt->pte[ptei] & NVKM_VMM_PTE_SPTES)) + break; + } + continue; + } + + /* As there's no more non-UNMAPPED SPTEs left in the range + * covered by a number of LPTEs, the LPTEs once again take + * control over their address range. + * + * Determine how many LPTEs need to transition state. + */ + pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID; + for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { + if (pgt->pte[ptei] & NVKM_VMM_PTE_SPTES) + break; + pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID; + } + + if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) { + TRA(it, "LPTE %05x: U -> S %d PTEs", pteb, ptes); + pair->func->sparse(vmm, pgt->pt[0], pteb, ptes); + } else + if (pair->func->invalid) { + /* If the MMU supports it, restore the LPTE to the + * INVALID state to tell the MMU there is no point + * trying to fetch the corresponding SPTEs. + */ + TRA(it, "LPTE %05x: U -> I %d PTEs", pteb, ptes); + pair->func->invalid(vmm, pgt->pt[0], pteb, ptes); + } + } +} + +static bool +nvkm_vmm_unref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) +{ + const struct nvkm_vmm_desc *desc = it->desc; + const int type = desc->type == SPT; + struct nvkm_vmm_pt *pgt = it->pt[0]; + + /* Drop PTE references. */ + pgt->refs[type] -= ptes; + + /* Dual-PTs need special handling, unless PDE becoming invalid. */ + if (desc->type == SPT && (pgt->refs[0] || pgt->refs[1])) + nvkm_vmm_unref_sptes(it, pgt, desc, ptei, ptes); + + /* PT no longer neeed? Destroy it. */ + if (!pgt->refs[type]) { + it->lvl++; + TRA(it, "%s empty", nvkm_vmm_desc_type(desc)); + it->lvl--; + nvkm_vmm_unref_pdes(it); + return false; /* PTE writes for unmap() not necessary. */ + } + + return true; +} + +static void +nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, + const struct nvkm_vmm_desc *desc, u32 ptei, u32 ptes) +{ + const struct nvkm_vmm_desc *pair = it->page[-1].desc; + const u32 sptb = desc->bits - pair->bits; + const u32 sptn = 1 << sptb; + struct nvkm_vmm *vmm = it->vmm; + u32 spti = ptei & (sptn - 1), lpti, pteb; + + /* Determine how many SPTEs are being touched under each LPTE, + * and increase reference counts. + */ + for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) { + const u32 pten = min(sptn - spti, ptes); + pgt->pte[lpti] += pten; + ptes -= pten; + } + + /* We're done here if there's no corresponding LPT. */ + if (!pgt->refs[0]) + return; + + for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) { + /* Skip over any LPTEs that already have valid SPTEs. */ + if (pgt->pte[pteb] & NVKM_VMM_PTE_VALID) { + for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { + if (!(pgt->pte[ptei] & NVKM_VMM_PTE_VALID)) + break; + } + continue; + } + + /* As there are now non-UNMAPPED SPTEs in the range covered + * by a number of LPTEs, we need to transfer control of the + * address range to the SPTEs. + * + * Determine how many LPTEs need to transition state. + */ + pgt->pte[ptei] |= NVKM_VMM_PTE_VALID; + for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { + if (pgt->pte[ptei] & NVKM_VMM_PTE_VALID) + break; + pgt->pte[ptei] |= NVKM_VMM_PTE_VALID; + } + + if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) { + const u32 spti = pteb * sptn; + const u32 sptc = ptes * sptn; + /* The entire LPTE is marked as sparse, we need + * to make sure that the SPTEs are too. + */ + TRA(it, "SPTE %05x: U -> S %d PTEs", spti, sptc); + desc->func->sparse(vmm, pgt->pt[1], spti, sptc); + /* Sparse LPTEs prevent SPTEs from being accessed. */ + TRA(it, "LPTE %05x: S -> U %d PTEs", pteb, ptes); + pair->func->unmap(vmm, pgt->pt[0], pteb, ptes); + } else + if (pair->func->invalid) { + /* MMU supports blocking SPTEs by marking an LPTE + * as INVALID. We need to reverse that here. + */ + TRA(it, "LPTE %05x: I -> U %d PTEs", pteb, ptes); + pair->func->unmap(vmm, pgt->pt[0], pteb, ptes); + } + } +} + +static bool +nvkm_vmm_ref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) +{ + const struct nvkm_vmm_desc *desc = it->desc; + const int type = desc->type == SPT; + struct nvkm_vmm_pt *pgt = it->pt[0]; + + /* Take PTE references. */ + pgt->refs[type] += ptes; + + /* Dual-PTs need special handling. */ + if (desc->type == SPT) + nvkm_vmm_ref_sptes(it, pgt, desc, ptei, ptes); + + return true; +} + +static void +nvkm_vmm_sparse_ptes(const struct nvkm_vmm_desc *desc, + struct nvkm_vmm_pt *pgt, u32 ptei, u32 ptes) +{ + if (desc->type == PGD) { + while (ptes--) + pgt->pde[ptei++] = NVKM_VMM_PDE_SPARSE; + } else + if (desc->type == LPT) { + memset(&pgt->pte[ptei], NVKM_VMM_PTE_SPARSE, ptes); + } +} + +static bool +nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) +{ + struct nvkm_vmm_pt *pt = it->pt[0]; + if (it->desc->type == PGD) + memset(&pt->pde[ptei], 0x00, sizeof(pt->pde[0]) * ptes); + else + if (it->desc->type == LPT) + memset(&pt->pte[ptei], 0x00, sizeof(pt->pte[0]) * ptes); + return nvkm_vmm_unref_ptes(it, ptei, ptes); +} + +static bool +nvkm_vmm_sparse_ref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) +{ + nvkm_vmm_sparse_ptes(it->desc, it->pt[0], ptei, ptes); + return nvkm_vmm_ref_ptes(it, ptei, ptes); +} + +static bool +nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei) +{ + const struct nvkm_vmm_desc *desc = &it->desc[it->lvl - 1]; + const int type = desc->type == SPT; + struct nvkm_vmm_pt *pgt = pgd->pde[pdei]; + const bool zero = !pgt->sparse && !desc->func->invalid; + struct nvkm_vmm *vmm = it->vmm; + struct nvkm_mmu *mmu = vmm->mmu; + struct nvkm_mmu_pt *pt; + u32 pten = 1 << desc->bits; + u32 pteb, ptei, ptes; + u32 size = desc->size * pten; + + pgd->refs[0]++; + + pgt->pt[type] = nvkm_mmu_ptc_get(mmu, size, desc->align, zero); + if (!pgt->pt[type]) { + it->lvl--; + nvkm_vmm_unref_pdes(it); + return false; + } + + if (zero) + goto done; + + pt = pgt->pt[type]; + + if (desc->type == LPT && pgt->refs[1]) { + /* SPT already exists covering the same range as this LPT, + * which means we need to be careful that any LPTEs which + * overlap valid SPTEs are unmapped as opposed to invalid + * or sparse, which would prevent the MMU from looking at + * the SPTEs on some GPUs. + */ + for (ptei = pteb = 0; ptei < pten; pteb = ptei) { + bool spte = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES; + for (ptes = 1, ptei++; ptei < pten; ptes++, ptei++) { + bool next = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES; + if (spte != next) + break; + } + + if (!spte) { + if (pgt->sparse) + desc->func->sparse(vmm, pt, pteb, ptes); + else + desc->func->invalid(vmm, pt, pteb, ptes); + memset(&pgt->pte[pteb], 0x00, ptes); + } else { + desc->func->unmap(vmm, pt, pteb, ptes); + while (ptes--) + pgt->pte[pteb++] |= NVKM_VMM_PTE_VALID; + } + } + } else { + if (pgt->sparse) { + nvkm_vmm_sparse_ptes(desc, pgt, 0, pten); + desc->func->sparse(vmm, pt, 0, pten); + } else { + desc->func->invalid(vmm, pt, 0, pten); + } + } + +done: + TRA(it, "PDE write %s", nvkm_vmm_desc_type(desc)); + it->desc[it->lvl].func->pde(it->vmm, pgd, pdei); + nvkm_vmm_flush_mark(it); + return true; +} + +static bool +nvkm_vmm_ref_swpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei) +{ + const struct nvkm_vmm_desc *desc = &it->desc[it->lvl - 1]; + struct nvkm_vmm_pt *pgt = pgd->pde[pdei]; + + pgt = nvkm_vmm_pt_new(desc, NVKM_VMM_PDE_SPARSED(pgt), it->page); + if (!pgt) { + if (!pgd->refs[0]) + nvkm_vmm_unref_pdes(it); + return false; + } + + pgd->pde[pdei] = pgt; + return true; +} + +static inline u64 +nvkm_vmm_iter(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size, const char *name, bool ref, + bool (*REF_PTES)(struct nvkm_vmm_iter *, u32, u32), + nvkm_vmm_pte_func MAP_PTES, struct nvkm_vmm_map *map, + nvkm_vmm_pxe_func CLR_PTES) +{ + const struct nvkm_vmm_desc *desc = page->desc; + struct nvkm_vmm_iter it; + u64 bits = addr >> page->shift; + + it.page = page; + it.desc = desc; + it.vmm = vmm; + it.cnt = size >> page->shift; + it.flush = NVKM_VMM_LEVELS_MAX; + + /* Deconstruct address into PTE indices for each mapping level. */ + for (it.lvl = 0; desc[it.lvl].bits; it.lvl++) { + it.pte[it.lvl] = bits & ((1 << desc[it.lvl].bits) - 1); + bits >>= desc[it.lvl].bits; + } + it.max = --it.lvl; + it.pt[it.max] = vmm->pd; + + it.lvl = 0; + TRA(&it, "%s: %016llx %016llx %d %lld PTEs", name, + addr, size, page->shift, it.cnt); + it.lvl = it.max; + + /* Depth-first traversal of page tables. */ + while (it.cnt) { + struct nvkm_vmm_pt *pgt = it.pt[it.lvl]; + const int type = desc->type == SPT; + const u32 pten = 1 << desc->bits; + const u32 ptei = it.pte[0]; + const u32 ptes = min_t(u64, it.cnt, pten - ptei); + + /* Walk down the tree, finding page tables for each level. */ + for (; it.lvl; it.lvl--) { + const u32 pdei = it.pte[it.lvl]; + struct nvkm_vmm_pt *pgd = pgt; + + /* Software PT. */ + if (ref && NVKM_VMM_PDE_INVALID(pgd->pde[pdei])) { + if (!nvkm_vmm_ref_swpt(&it, pgd, pdei)) + goto fail; + } + it.pt[it.lvl - 1] = pgt = pgd->pde[pdei]; + + /* Hardware PT. + * + * This is a separate step from above due to GF100 and + * newer having dual page tables at some levels, which + * are refcounted independently. + */ + if (ref && !pgt->refs[desc[it.lvl - 1].type == SPT]) { + if (!nvkm_vmm_ref_hwpt(&it, pgd, pdei)) + goto fail; + } + } + + /* Handle PTE updates. */ + if (!REF_PTES || REF_PTES(&it, ptei, ptes)) { + struct nvkm_mmu_pt *pt = pgt->pt[type]; + if (MAP_PTES || CLR_PTES) { + if (MAP_PTES) + MAP_PTES(vmm, pt, ptei, ptes, map); + else + CLR_PTES(vmm, pt, ptei, ptes); + nvkm_vmm_flush_mark(&it); + } + } + + /* Walk back up the tree to the next position. */ + it.pte[it.lvl] += ptes; + it.cnt -= ptes; + if (it.cnt) { + while (it.pte[it.lvl] == (1 << desc[it.lvl].bits)) { + it.pte[it.lvl++] = 0; + it.pte[it.lvl]++; + } + } + }; + + nvkm_vmm_flush(&it); + return ~0ULL; + +fail: + /* Reconstruct the failure address so the caller is able to + * reverse any partially completed operations. + */ + addr = it.pte[it.max--]; + do { + addr = addr << desc[it.max].bits; + addr |= it.pte[it.max]; + } while (it.max--); + + return addr << page->shift; +} + +static void +nvkm_vmm_ptes_sparse_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size) +{ + nvkm_vmm_iter(vmm, page, addr, size, "sparse unref", false, + nvkm_vmm_sparse_unref_ptes, NULL, NULL, + page->desc->func->invalid ? + page->desc->func->invalid : page->desc->func->unmap); +} + +static int +nvkm_vmm_ptes_sparse_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size) +{ + if ((page->type & NVKM_VMM_PAGE_SPARSE)) { + u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "sparse ref", + true, nvkm_vmm_sparse_ref_ptes, NULL, + NULL, page->desc->func->sparse); + if (fail != ~0ULL) { + if ((size = fail - addr)) + nvkm_vmm_ptes_sparse_put(vmm, page, addr, size); + return -ENOMEM; + } + return 0; + } + return -EINVAL; +} + +static int +nvkm_vmm_ptes_sparse(struct nvkm_vmm *vmm, u64 addr, u64 size, bool ref) +{ + const struct nvkm_vmm_page *page = vmm->func->page; + int m = 0, i; + u64 start = addr; + u64 block; + + while (size) { + /* Limit maximum page size based on remaining size. */ + while (size < (1ULL << page[m].shift)) + m++; + i = m; + + /* Find largest page size suitable for alignment. */ + while (!IS_ALIGNED(addr, 1ULL << page[i].shift)) + i++; + + /* Determine number of PTEs at this page size. */ + if (i != m) { + /* Limited to alignment boundary of next page size. */ + u64 next = 1ULL << page[i - 1].shift; + u64 part = ALIGN(addr, next) - addr; + if (size - part >= next) + block = (part >> page[i].shift) << page[i].shift; + else + block = (size >> page[i].shift) << page[i].shift; + } else { + block = (size >> page[i].shift) << page[i].shift;; + } + + /* Perform operation. */ + if (ref) { + int ret = nvkm_vmm_ptes_sparse_get(vmm, &page[i], addr, block); + if (ret) { + if ((size = addr - start)) + nvkm_vmm_ptes_sparse(vmm, start, size, false); + return ret; + } + } else { + nvkm_vmm_ptes_sparse_put(vmm, &page[i], addr, block); + } + + size -= block; + addr += block; + } + + return 0; +} + +static void +nvkm_vmm_ptes_unmap_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size, bool sparse) +{ + const struct nvkm_vmm_desc_func *func = page->desc->func; + nvkm_vmm_iter(vmm, page, addr, size, "unmap + unref", + false, nvkm_vmm_unref_ptes, NULL, NULL, + sparse ? func->sparse : func->invalid ? func->invalid : + func->unmap); +} + +static int +nvkm_vmm_ptes_get_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size, struct nvkm_vmm_map *map, + nvkm_vmm_pte_func func) +{ + u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref + map", true, + nvkm_vmm_ref_ptes, func, map, NULL); + if (fail != ~0ULL) { + if ((size = fail - addr)) + nvkm_vmm_ptes_unmap_put(vmm, page, addr, size, false); + return -ENOMEM; + } + return 0; +} + +static void +nvkm_vmm_ptes_unmap(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size, bool sparse) +{ + const struct nvkm_vmm_desc_func *func = page->desc->func; + nvkm_vmm_iter(vmm, page, addr, size, "unmap", false, NULL, NULL, NULL, + sparse ? func->sparse : func->invalid ? func->invalid : + func->unmap); +} + +static void +nvkm_vmm_ptes_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size, struct nvkm_vmm_map *map, + nvkm_vmm_pte_func func) +{ + nvkm_vmm_iter(vmm, page, addr, size, "map", false, + NULL, func, map, NULL); +} + +static void +nvkm_vmm_ptes_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size) +{ + nvkm_vmm_iter(vmm, page, addr, size, "unref", false, + nvkm_vmm_unref_ptes, NULL, NULL, NULL); +} + +static int +nvkm_vmm_ptes_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, + u64 addr, u64 size) +{ + u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref", true, + nvkm_vmm_ref_ptes, NULL, NULL, NULL); + if (fail != ~0ULL) { + if (fail != addr) + nvkm_vmm_ptes_put(vmm, page, addr, fail - addr); + return -ENOMEM; + } + return 0; +} + +static inline struct nvkm_vma * +nvkm_vma_new(u64 addr, u64 size) +{ + struct nvkm_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); + if (vma) { + vma->addr = addr; + vma->size = size; + vma->page = NVKM_VMA_PAGE_NONE; + vma->refd = NVKM_VMA_PAGE_NONE; + } + return vma; +} + +struct nvkm_vma * +nvkm_vma_tail(struct nvkm_vma *vma, u64 tail) +{ + struct nvkm_vma *new; + + BUG_ON(vma->size == tail); + + if (!(new = nvkm_vma_new(vma->addr + (vma->size - tail), tail))) + return NULL; + vma->size -= tail; + + new->mapref = vma->mapref; + new->sparse = vma->sparse; + new->page = vma->page; + new->refd = vma->refd; + new->used = vma->used; + new->part = vma->part; + new->user = vma->user; + new->busy = vma->busy; + list_add(&new->head, &vma->head); + return new; +} + +static void +nvkm_vmm_free_insert(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + struct rb_node **ptr = &vmm->free.rb_node; + struct rb_node *parent = NULL; + + while (*ptr) { + struct nvkm_vma *this = rb_entry(*ptr, typeof(*this), tree); + parent = *ptr; + if (vma->size < this->size) + ptr = &parent->rb_left; + else + if (vma->size > this->size) + ptr = &parent->rb_right; + else + if (vma->addr < this->addr) + ptr = &parent->rb_left; + else + if (vma->addr > this->addr) + ptr = &parent->rb_right; + else + BUG(); + } + + rb_link_node(&vma->tree, parent, ptr); + rb_insert_color(&vma->tree, &vmm->free); +} + +void +nvkm_vmm_node_insert(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + struct rb_node **ptr = &vmm->root.rb_node; + struct rb_node *parent = NULL; + + while (*ptr) { + struct nvkm_vma *this = rb_entry(*ptr, typeof(*this), tree); + parent = *ptr; + if (vma->addr < this->addr) + ptr = &parent->rb_left; + else + if (vma->addr > this->addr) + ptr = &parent->rb_right; + else + BUG(); + } + + rb_link_node(&vma->tree, parent, ptr); + rb_insert_color(&vma->tree, &vmm->root); +} + +struct nvkm_vma * +nvkm_vmm_node_search(struct nvkm_vmm *vmm, u64 addr) +{ + struct rb_node *node = vmm->root.rb_node; + while (node) { + struct nvkm_vma *vma = rb_entry(node, typeof(*vma), tree); + if (addr < vma->addr) + node = node->rb_left; + else + if (addr >= vma->addr + vma->size) + node = node->rb_right; + else + return vma; + } + return NULL; +} + +static void +nvkm_vmm_dtor(struct nvkm_vmm *vmm) +{ + struct nvkm_vma *vma; + struct rb_node *node; + + while ((node = rb_first(&vmm->root))) { + struct nvkm_vma *vma = rb_entry(node, typeof(*vma), tree); + nvkm_vmm_put(vmm, &vma); + } + + if (vmm->bootstrapped) { + const struct nvkm_vmm_page *page = vmm->func->page; + const u64 limit = vmm->limit - vmm->start; + + while (page[1].shift) + page++; + + nvkm_mmu_ptc_dump(vmm->mmu); + nvkm_vmm_ptes_put(vmm, page, vmm->start, limit); + } + + vma = list_first_entry(&vmm->list, typeof(*vma), head); + list_del(&vma->head); + kfree(vma); + WARN_ON(!list_empty(&vmm->list)); + + if (vmm->nullp) { + dma_free_coherent(vmm->mmu->subdev.device->dev, 16 * 1024, + vmm->nullp, vmm->null); + } + + if (vmm->pd) { + nvkm_mmu_ptc_put(vmm->mmu, true, &vmm->pd->pt[0]); + nvkm_vmm_pt_del(&vmm->pd); + } +} + +int +nvkm_vmm_ctor(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu, + u32 pd_header, u64 addr, u64 size, struct lock_class_key *key, + const char *name, struct nvkm_vmm *vmm) +{ + static struct lock_class_key _key; + const struct nvkm_vmm_page *page = func->page; + const struct nvkm_vmm_desc *desc; + struct nvkm_vma *vma; + int levels, bits = 0; + + vmm->func = func; + vmm->mmu = mmu; + vmm->name = name; + vmm->debug = mmu->subdev.debug; + kref_init(&vmm->kref); + + __mutex_init(&vmm->mutex, "&vmm->mutex", key ? key : &_key); + + /* Locate the smallest page size supported by the backend, it will + * have the the deepest nesting of page tables. + */ + while (page[1].shift) + page++; + + /* Locate the structure that describes the layout of the top-level + * page table, and determine the number of valid bits in a virtual + * address. + */ + for (levels = 0, desc = page->desc; desc->bits; desc++, levels++) + bits += desc->bits; + bits += page->shift; + desc--; + + if (WARN_ON(levels > NVKM_VMM_LEVELS_MAX)) + return -EINVAL; + + vmm->start = addr; + vmm->limit = size ? (addr + size) : (1ULL << bits); + if (vmm->start > vmm->limit || vmm->limit > (1ULL << bits)) + return -EINVAL; + + /* Allocate top-level page table. */ + vmm->pd = nvkm_vmm_pt_new(desc, false, NULL); + if (!vmm->pd) + return -ENOMEM; + vmm->pd->refs[0] = 1; + INIT_LIST_HEAD(&vmm->join); + + /* ... and the GPU storage for it, except on Tesla-class GPUs that + * have the PD embedded in the instance structure. + */ + if (desc->size) { + const u32 size = pd_header + desc->size * (1 << desc->bits); + vmm->pd->pt[0] = nvkm_mmu_ptc_get(mmu, size, desc->align, true); + if (!vmm->pd->pt[0]) + return -ENOMEM; + } + + /* Initialise address-space MM. */ + INIT_LIST_HEAD(&vmm->list); + vmm->free = RB_ROOT; + vmm->root = RB_ROOT; + + if (!(vma = nvkm_vma_new(vmm->start, vmm->limit - vmm->start))) + return -ENOMEM; + + nvkm_vmm_free_insert(vmm, vma); + list_add(&vma->head, &vmm->list); + return 0; +} + +int +nvkm_vmm_new_(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu, + u32 hdr, u64 addr, u64 size, struct lock_class_key *key, + const char *name, struct nvkm_vmm **pvmm) +{ + if (!(*pvmm = kzalloc(sizeof(**pvmm), GFP_KERNEL))) + return -ENOMEM; + return nvkm_vmm_ctor(func, mmu, hdr, addr, size, key, name, *pvmm); +} + +#define node(root, dir) ((root)->head.dir == &vmm->list) ? NULL : \ + list_entry((root)->head.dir, struct nvkm_vma, head) + +void +nvkm_vmm_unmap_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + struct nvkm_vma *next; + + nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags); + nvkm_memory_unref(&vma->memory); + + if (vma->part) { + struct nvkm_vma *prev = node(vma, prev); + if (!prev->memory) { + prev->size += vma->size; + rb_erase(&vma->tree, &vmm->root); + list_del(&vma->head); + kfree(vma); + vma = prev; + } + } + + next = node(vma, next); + if (next && next->part) { + if (!next->memory) { + vma->size += next->size; + rb_erase(&next->tree, &vmm->root); + list_del(&next->head); + kfree(next); + } + } +} + +void +nvkm_vmm_unmap_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + const struct nvkm_vmm_page *page = &vmm->func->page[vma->refd]; + + if (vma->mapref) { + nvkm_vmm_ptes_unmap_put(vmm, page, vma->addr, vma->size, vma->sparse); + vma->refd = NVKM_VMA_PAGE_NONE; + } else { + nvkm_vmm_ptes_unmap(vmm, page, vma->addr, vma->size, vma->sparse); + } + + nvkm_vmm_unmap_region(vmm, vma); +} + +void +nvkm_vmm_unmap(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + if (vma->memory) { + mutex_lock(&vmm->mutex); + nvkm_vmm_unmap_locked(vmm, vma); + mutex_unlock(&vmm->mutex); + } +} + +static int +nvkm_vmm_map_valid(struct nvkm_vmm *vmm, struct nvkm_vma *vma, + void *argv, u32 argc, struct nvkm_vmm_map *map) +{ + switch (nvkm_memory_target(map->memory)) { + case NVKM_MEM_TARGET_VRAM: + if (!(map->page->type & NVKM_VMM_PAGE_VRAM)) { + VMM_DEBUG(vmm, "%d !VRAM", map->page->shift); + return -EINVAL; + } + break; + case NVKM_MEM_TARGET_HOST: + case NVKM_MEM_TARGET_NCOH: + if (!(map->page->type & NVKM_VMM_PAGE_HOST)) { + VMM_DEBUG(vmm, "%d !HOST", map->page->shift); + return -EINVAL; + } + break; + default: + WARN_ON(1); + return -ENOSYS; + } + + if (!IS_ALIGNED( vma->addr, 1ULL << map->page->shift) || + !IS_ALIGNED((u64)vma->size, 1ULL << map->page->shift) || + !IS_ALIGNED( map->offset, 1ULL << map->page->shift) || + nvkm_memory_page(map->memory) < map->page->shift) { + VMM_DEBUG(vmm, "alignment %016llx %016llx %016llx %d %d", + vma->addr, (u64)vma->size, map->offset, map->page->shift, + nvkm_memory_page(map->memory)); + return -EINVAL; + } + + return vmm->func->valid(vmm, argv, argc, map); +} + +static int +nvkm_vmm_map_choose(struct nvkm_vmm *vmm, struct nvkm_vma *vma, + void *argv, u32 argc, struct nvkm_vmm_map *map) +{ + for (map->page = vmm->func->page; map->page->shift; map->page++) { + VMM_DEBUG(vmm, "trying %d", map->page->shift); + if (!nvkm_vmm_map_valid(vmm, vma, argv, argc, map)) + return 0; + } + return -EINVAL; +} + +static int +nvkm_vmm_map_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma, + void *argv, u32 argc, struct nvkm_vmm_map *map) +{ + nvkm_vmm_pte_func func; + int ret; + + /* Make sure we won't overrun the end of the memory object. */ + if (unlikely(nvkm_memory_size(map->memory) < map->offset + vma->size)) { + VMM_DEBUG(vmm, "overrun %016llx %016llx %016llx", + nvkm_memory_size(map->memory), + map->offset, (u64)vma->size); + return -EINVAL; + } + + /* Check remaining arguments for validity. */ + if (vma->page == NVKM_VMA_PAGE_NONE && + vma->refd == NVKM_VMA_PAGE_NONE) { + /* Find the largest page size we can perform the mapping at. */ + const u32 debug = vmm->debug; + vmm->debug = 0; + ret = nvkm_vmm_map_choose(vmm, vma, argv, argc, map); + vmm->debug = debug; + if (ret) { + VMM_DEBUG(vmm, "invalid at any page size"); + nvkm_vmm_map_choose(vmm, vma, argv, argc, map); + return -EINVAL; + } + } else { + /* Page size of the VMA is already pre-determined. */ + if (vma->refd != NVKM_VMA_PAGE_NONE) + map->page = &vmm->func->page[vma->refd]; + else + map->page = &vmm->func->page[vma->page]; + + ret = nvkm_vmm_map_valid(vmm, vma, argv, argc, map); + if (ret) { + VMM_DEBUG(vmm, "invalid %d\n", ret); + return ret; + } + } + + /* Deal with the 'offset' argument, and fetch the backend function. */ + map->off = map->offset; + if (map->mem) { + for (; map->off; map->mem = map->mem->next) { + u64 size = (u64)map->mem->length << NVKM_RAM_MM_SHIFT; + if (size > map->off) + break; + map->off -= size; + } + func = map->page->desc->func->mem; + } else + if (map->sgl) { + for (; map->off; map->sgl = sg_next(map->sgl)) { + u64 size = sg_dma_len(map->sgl); + if (size > map->off) + break; + map->off -= size; + } + func = map->page->desc->func->sgl; + } else { + map->dma += map->offset >> PAGE_SHIFT; + map->off = map->offset & PAGE_MASK; + func = map->page->desc->func->dma; + } + + /* Perform the map. */ + if (vma->refd == NVKM_VMA_PAGE_NONE) { + ret = nvkm_vmm_ptes_get_map(vmm, map->page, vma->addr, vma->size, map, func); + if (ret) + return ret; + + vma->refd = map->page - vmm->func->page; + } else { + nvkm_vmm_ptes_map(vmm, map->page, vma->addr, vma->size, map, func); + } + + nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags); + nvkm_memory_unref(&vma->memory); + vma->memory = nvkm_memory_ref(map->memory); + vma->tags = map->tags; + return 0; +} + +int +nvkm_vmm_map(struct nvkm_vmm *vmm, struct nvkm_vma *vma, void *argv, u32 argc, + struct nvkm_vmm_map *map) +{ + int ret; + mutex_lock(&vmm->mutex); + ret = nvkm_vmm_map_locked(vmm, vma, argv, argc, map); + vma->busy = false; + mutex_unlock(&vmm->mutex); + return ret; +} + +static void +nvkm_vmm_put_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + struct nvkm_vma *prev, *next; + + if ((prev = node(vma, prev)) && !prev->used) { + rb_erase(&prev->tree, &vmm->free); + list_del(&prev->head); + vma->addr = prev->addr; + vma->size += prev->size; + kfree(prev); + } + + if ((next = node(vma, next)) && !next->used) { + rb_erase(&next->tree, &vmm->free); + list_del(&next->head); + vma->size += next->size; + kfree(next); + } + + nvkm_vmm_free_insert(vmm, vma); +} + +void +nvkm_vmm_put_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +{ + const struct nvkm_vmm_page *page = vmm->func->page; + struct nvkm_vma *next = vma; + + BUG_ON(vma->part); + + if (vma->mapref || !vma->sparse) { + do { + const bool map = next->memory != NULL; + const u8 refd = next->refd; + const u64 addr = next->addr; + u64 size = next->size; + + /* Merge regions that are in the same state. */ + while ((next = node(next, next)) && next->part && + (next->memory != NULL) == map && + (next->refd == refd)) + size += next->size; + + if (map) { + /* Region(s) are mapped, merge the unmap + * and dereference into a single walk of + * the page tree. + */ + nvkm_vmm_ptes_unmap_put(vmm, &page[refd], addr, + size, vma->sparse); + } else + if (refd != NVKM_VMA_PAGE_NONE) { + /* Drop allocation-time PTE references. */ + nvkm_vmm_ptes_put(vmm, &page[refd], addr, size); + } + } while (next && next->part); + } + + /* Merge any mapped regions that were split from the initial + * address-space allocation back into the allocated VMA, and + * release memory/compression resources. + */ + next = vma; + do { + if (next->memory) + nvkm_vmm_unmap_region(vmm, next); + } while ((next = node(vma, next)) && next->part); + + if (vma->sparse && !vma->mapref) { + /* Sparse region that was allocated with a fixed page size, + * meaning all relevant PTEs were referenced once when the + * region was allocated, and remained that way, regardless + * of whether memory was mapped into it afterwards. + * + * The process of unmapping, unsparsing, and dereferencing + * PTEs can be done in a single page tree walk. + */ + nvkm_vmm_ptes_sparse_put(vmm, &page[vma->refd], vma->addr, vma->size); + } else + if (vma->sparse) { + /* Sparse region that wasn't allocated with a fixed page size, + * PTE references were taken both at allocation time (to make + * the GPU see the region as sparse), and when mapping memory + * into the region. + * + * The latter was handled above, and the remaining references + * are dealt with here. + */ + nvkm_vmm_ptes_sparse(vmm, vma->addr, vma->size, false); + } + + /* Remove VMA from the list of allocated nodes. */ + rb_erase(&vma->tree, &vmm->root); + + /* Merge VMA back into the free list. */ + vma->page = NVKM_VMA_PAGE_NONE; + vma->refd = NVKM_VMA_PAGE_NONE; + vma->used = false; + vma->user = false; + nvkm_vmm_put_region(vmm, vma); +} + +void +nvkm_vmm_put(struct nvkm_vmm *vmm, struct nvkm_vma **pvma) +{ + struct nvkm_vma *vma = *pvma; + if (vma) { + mutex_lock(&vmm->mutex); + nvkm_vmm_put_locked(vmm, vma); + mutex_unlock(&vmm->mutex); + *pvma = NULL; + } +} + +int +nvkm_vmm_get_locked(struct nvkm_vmm *vmm, bool getref, bool mapref, bool sparse, + u8 shift, u8 align, u64 size, struct nvkm_vma **pvma) +{ + const struct nvkm_vmm_page *page = &vmm->func->page[NVKM_VMA_PAGE_NONE]; + struct rb_node *node = NULL, *temp; + struct nvkm_vma *vma = NULL, *tmp; + u64 addr, tail; + int ret; + + VMM_TRACE(vmm, "getref %d mapref %d sparse %d " + "shift: %d align: %d size: %016llx", + getref, mapref, sparse, shift, align, size); + + /* Zero-sized, or lazily-allocated sparse VMAs, make no sense. */ + if (unlikely(!size || (!getref && !mapref && sparse))) { + VMM_DEBUG(vmm, "args %016llx %d %d %d", + size, getref, mapref, sparse); + return -EINVAL; + } + + /* Tesla-class GPUs can only select page size per-PDE, which means + * we're required to know the mapping granularity up-front to find + * a suitable region of address-space. + * + * The same goes if we're requesting up-front allocation of PTES. + */ + if (unlikely((getref || vmm->func->page_block) && !shift)) { + VMM_DEBUG(vmm, "page size required: %d %016llx", + getref, vmm->func->page_block); + return -EINVAL; + } + + /* If a specific page size was requested, determine its index and + * make sure the requested size is a multiple of the page size. + */ + if (shift) { + for (page = vmm->func->page; page->shift; page++) { + if (shift == page->shift) + break; + } + + if (!page->shift || !IS_ALIGNED(size, 1ULL << page->shift)) { + VMM_DEBUG(vmm, "page %d %016llx", shift, size); + return -EINVAL; + } + align = max_t(u8, align, shift); + } else { + align = max_t(u8, align, 12); + } + + /* Locate smallest block that can possibly satisfy the allocation. */ + temp = vmm->free.rb_node; + while (temp) { + struct nvkm_vma *this = rb_entry(temp, typeof(*this), tree); + if (this->size < size) { + temp = temp->rb_right; + } else { + node = temp; + temp = temp->rb_left; + } + } + + if (unlikely(!node)) + return -ENOSPC; + + /* Take into account alignment restrictions, trying larger blocks + * in turn until we find a suitable free block. + */ + do { + struct nvkm_vma *this = rb_entry(node, typeof(*this), tree); + struct nvkm_vma *prev = node(this, prev); + struct nvkm_vma *next = node(this, next); + const int p = page - vmm->func->page; + + addr = this->addr; + if (vmm->func->page_block && prev && prev->page != p) + addr = ALIGN(addr, vmm->func->page_block); + addr = ALIGN(addr, 1ULL << align); + + tail = this->addr + this->size; + if (vmm->func->page_block && next && next->page != p) + tail = ALIGN_DOWN(addr, vmm->func->page_block); + + if (addr <= tail && tail - addr >= size) { + rb_erase(&this->tree, &vmm->free); + vma = this; + break; + } + } while ((node = rb_next(node))); + + if (unlikely(!vma)) + return -ENOSPC; + + /* If the VMA we found isn't already exactly the requested size, + * it needs to be split, and the remaining free blocks returned. + */ + if (addr != vma->addr) { + if (!(tmp = nvkm_vma_tail(vma, vma->size + vma->addr - addr))) { + nvkm_vmm_put_region(vmm, vma); + return -ENOMEM; + } + nvkm_vmm_free_insert(vmm, vma); + vma = tmp; + } + + if (size != vma->size) { + if (!(tmp = nvkm_vma_tail(vma, vma->size - size))) { + nvkm_vmm_put_region(vmm, vma); + return -ENOMEM; + } + nvkm_vmm_free_insert(vmm, tmp); + } + + /* Pre-allocate page tables and/or setup sparse mappings. */ + if (sparse && getref) + ret = nvkm_vmm_ptes_sparse_get(vmm, page, vma->addr, vma->size); + else if (sparse) + ret = nvkm_vmm_ptes_sparse(vmm, vma->addr, vma->size, true); + else if (getref) + ret = nvkm_vmm_ptes_get(vmm, page, vma->addr, vma->size); + else + ret = 0; + if (ret) { + nvkm_vmm_put_region(vmm, vma); + return ret; + } + + vma->mapref = mapref && !getref; + vma->sparse = sparse; + vma->page = page - vmm->func->page; + vma->refd = getref ? vma->page : NVKM_VMA_PAGE_NONE; + vma->used = true; + nvkm_vmm_node_insert(vmm, vma); + *pvma = vma; + return 0; +} + +int +nvkm_vmm_get(struct nvkm_vmm *vmm, u8 page, u64 size, struct nvkm_vma **pvma) +{ + int ret; + mutex_lock(&vmm->mutex); + ret = nvkm_vmm_get_locked(vmm, false, true, false, page, 0, size, pvma); + mutex_unlock(&vmm->mutex); + return ret; +} + +void +nvkm_vmm_part(struct nvkm_vmm *vmm, struct nvkm_memory *inst) +{ + if (vmm->func->part && inst) { + mutex_lock(&vmm->mutex); + vmm->func->part(vmm, inst); + mutex_unlock(&vmm->mutex); + } +} + +int +nvkm_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst) +{ + int ret = 0; + if (vmm->func->join) { + mutex_lock(&vmm->mutex); + ret = vmm->func->join(vmm, inst); + mutex_unlock(&vmm->mutex); + } + return ret; +} + +static bool +nvkm_vmm_boot_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) +{ + const struct nvkm_vmm_desc *desc = it->desc; + const int type = desc->type == SPT; + nvkm_memory_boot(it->pt[0]->pt[type]->memory, it->vmm); + return false; +} + +int +nvkm_vmm_boot(struct nvkm_vmm *vmm) +{ + const struct nvkm_vmm_page *page = vmm->func->page; + const u64 limit = vmm->limit - vmm->start; + int ret; + + while (page[1].shift) + page++; + + ret = nvkm_vmm_ptes_get(vmm, page, vmm->start, limit); + if (ret) + return ret; + + nvkm_vmm_iter(vmm, page, vmm->start, limit, "bootstrap", false, + nvkm_vmm_boot_ptes, NULL, NULL, NULL); + vmm->bootstrapped = true; + return 0; +} + +static void +nvkm_vmm_del(struct kref *kref) +{ + struct nvkm_vmm *vmm = container_of(kref, typeof(*vmm), kref); + nvkm_vmm_dtor(vmm); + kfree(vmm); +} + +void +nvkm_vmm_unref(struct nvkm_vmm **pvmm) +{ + struct nvkm_vmm *vmm = *pvmm; + if (vmm) { + kref_put(&vmm->kref, nvkm_vmm_del); + *pvmm = NULL; + } +} + +struct nvkm_vmm * +nvkm_vmm_ref(struct nvkm_vmm *vmm) +{ + if (vmm) + kref_get(&vmm->kref); + return vmm; +} + +int +nvkm_vmm_new(struct nvkm_device *device, u64 addr, u64 size, void *argv, + u32 argc, struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + struct nvkm_mmu *mmu = device->mmu; + struct nvkm_vmm *vmm = NULL; + int ret; + ret = mmu->func->vmm.ctor(mmu, addr, size, argv, argc, key, name, &vmm); + if (ret) + nvkm_vmm_unref(&vmm); + *pvmm = vmm; + return ret; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h new file mode 100644 index 000000000000..6d8f61ea467a --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h @@ -0,0 +1,310 @@ +#ifndef __NVKM_VMM_H__ +#define __NVKM_VMM_H__ +#include "priv.h" +#include <core/memory.h> +enum nvkm_memory_target; + +struct nvkm_vmm_pt { + /* Some GPUs have a mapping level with a dual page tables to + * support large and small pages in the same address-range. + * + * We track the state of both page tables in one place, which + * is why there's multiple PT pointers/refcounts here. + */ + struct nvkm_mmu_pt *pt[2]; + u32 refs[2]; + + /* Page size handled by this PT. + * + * Tesla backend needs to know this when writinge PDEs, + * otherwise unnecessary. + */ + u8 page; + + /* Entire page table sparse. + * + * Used to propagate sparseness to child page tables. + */ + bool sparse:1; + + /* Tracking for page directories. + * + * The array is indexed by PDE, and will either point to the + * child page table, or indicate the PDE is marked as sparse. + **/ +#define NVKM_VMM_PDE_INVALID(pde) IS_ERR_OR_NULL(pde) +#define NVKM_VMM_PDE_SPARSED(pde) IS_ERR(pde) +#define NVKM_VMM_PDE_SPARSE ERR_PTR(-EBUSY) + struct nvkm_vmm_pt **pde; + + /* Tracking for dual page tables. + * + * There's one entry for each LPTE, keeping track of whether + * there are valid SPTEs in the same address-range. + * + * This information is used to manage LPTE state transitions. + */ +#define NVKM_VMM_PTE_SPARSE 0x80 +#define NVKM_VMM_PTE_VALID 0x40 +#define NVKM_VMM_PTE_SPTES 0x3f + u8 pte[]; +}; + +typedef void (*nvkm_vmm_pxe_func)(struct nvkm_vmm *, + struct nvkm_mmu_pt *, u32 ptei, u32 ptes); +typedef void (*nvkm_vmm_pde_func)(struct nvkm_vmm *, + struct nvkm_vmm_pt *, u32 pdei); +typedef void (*nvkm_vmm_pte_func)(struct nvkm_vmm *, struct nvkm_mmu_pt *, + u32 ptei, u32 ptes, struct nvkm_vmm_map *); + +struct nvkm_vmm_desc_func { + nvkm_vmm_pxe_func invalid; + nvkm_vmm_pxe_func unmap; + nvkm_vmm_pxe_func sparse; + + nvkm_vmm_pde_func pde; + + nvkm_vmm_pte_func mem; + nvkm_vmm_pte_func dma; + nvkm_vmm_pte_func sgl; +}; + +extern const struct nvkm_vmm_desc_func gf100_vmm_pgd; +void gf100_vmm_pgd_pde(struct nvkm_vmm *, struct nvkm_vmm_pt *, u32); +extern const struct nvkm_vmm_desc_func gf100_vmm_pgt; +void gf100_vmm_pgt_unmap(struct nvkm_vmm *, struct nvkm_mmu_pt *, u32, u32); +void gf100_vmm_pgt_mem(struct nvkm_vmm *, struct nvkm_mmu_pt *, u32, u32, + struct nvkm_vmm_map *); +void gf100_vmm_pgt_dma(struct nvkm_vmm *, struct nvkm_mmu_pt *, u32, u32, + struct nvkm_vmm_map *); +void gf100_vmm_pgt_sgl(struct nvkm_vmm *, struct nvkm_mmu_pt *, u32, u32, + struct nvkm_vmm_map *); + +void gk104_vmm_lpt_invalid(struct nvkm_vmm *, struct nvkm_mmu_pt *, u32, u32); + +struct nvkm_vmm_desc { + enum { + PGD, + PGT, + SPT, + LPT, + } type; + u8 bits; /* VMA bits covered by PT. */ + u8 size; /* Bytes-per-PTE. */ + u32 align; /* PT address alignment. */ + const struct nvkm_vmm_desc_func *func; +}; + +extern const struct nvkm_vmm_desc gk104_vmm_desc_16_12[]; +extern const struct nvkm_vmm_desc gk104_vmm_desc_16_16[]; +extern const struct nvkm_vmm_desc gk104_vmm_desc_17_12[]; +extern const struct nvkm_vmm_desc gk104_vmm_desc_17_17[]; + +extern const struct nvkm_vmm_desc gm200_vmm_desc_16_12[]; +extern const struct nvkm_vmm_desc gm200_vmm_desc_16_16[]; +extern const struct nvkm_vmm_desc gm200_vmm_desc_17_12[]; +extern const struct nvkm_vmm_desc gm200_vmm_desc_17_17[]; + +extern const struct nvkm_vmm_desc gp100_vmm_desc_12[]; +extern const struct nvkm_vmm_desc gp100_vmm_desc_16[]; + +struct nvkm_vmm_page { + u8 shift; + const struct nvkm_vmm_desc *desc; +#define NVKM_VMM_PAGE_SPARSE 0x01 +#define NVKM_VMM_PAGE_VRAM 0x02 +#define NVKM_VMM_PAGE_HOST 0x04 +#define NVKM_VMM_PAGE_COMP 0x08 +#define NVKM_VMM_PAGE_Sxxx (NVKM_VMM_PAGE_SPARSE) +#define NVKM_VMM_PAGE_xVxx (NVKM_VMM_PAGE_VRAM) +#define NVKM_VMM_PAGE_SVxx (NVKM_VMM_PAGE_Sxxx | NVKM_VMM_PAGE_VRAM) +#define NVKM_VMM_PAGE_xxHx (NVKM_VMM_PAGE_HOST) +#define NVKM_VMM_PAGE_SxHx (NVKM_VMM_PAGE_Sxxx | NVKM_VMM_PAGE_HOST) +#define NVKM_VMM_PAGE_xVHx (NVKM_VMM_PAGE_xVxx | NVKM_VMM_PAGE_HOST) +#define NVKM_VMM_PAGE_SVHx (NVKM_VMM_PAGE_SVxx | NVKM_VMM_PAGE_HOST) +#define NVKM_VMM_PAGE_xVxC (NVKM_VMM_PAGE_xVxx | NVKM_VMM_PAGE_COMP) +#define NVKM_VMM_PAGE_SVxC (NVKM_VMM_PAGE_SVxx | NVKM_VMM_PAGE_COMP) +#define NVKM_VMM_PAGE_xxHC (NVKM_VMM_PAGE_xxHx | NVKM_VMM_PAGE_COMP) +#define NVKM_VMM_PAGE_SxHC (NVKM_VMM_PAGE_SxHx | NVKM_VMM_PAGE_COMP) + u8 type; +}; + +struct nvkm_vmm_func { + int (*join)(struct nvkm_vmm *, struct nvkm_memory *inst); + void (*part)(struct nvkm_vmm *, struct nvkm_memory *inst); + + int (*aper)(enum nvkm_memory_target); + int (*valid)(struct nvkm_vmm *, void *argv, u32 argc, + struct nvkm_vmm_map *); + void (*flush)(struct nvkm_vmm *, int depth); + + u64 page_block; + const struct nvkm_vmm_page page[]; +}; + +struct nvkm_vmm_join { + struct nvkm_memory *inst; + struct list_head head; +}; + +int nvkm_vmm_new_(const struct nvkm_vmm_func *, struct nvkm_mmu *, + u32 pd_header, u64 addr, u64 size, struct lock_class_key *, + const char *name, struct nvkm_vmm **); +int nvkm_vmm_ctor(const struct nvkm_vmm_func *, struct nvkm_mmu *, + u32 pd_header, u64 addr, u64 size, struct lock_class_key *, + const char *name, struct nvkm_vmm *); +struct nvkm_vma *nvkm_vmm_node_search(struct nvkm_vmm *, u64 addr); +int nvkm_vmm_get_locked(struct nvkm_vmm *, bool getref, bool mapref, + bool sparse, u8 page, u8 align, u64 size, + struct nvkm_vma **pvma); +void nvkm_vmm_put_locked(struct nvkm_vmm *, struct nvkm_vma *); +void nvkm_vmm_unmap_locked(struct nvkm_vmm *, struct nvkm_vma *); +void nvkm_vmm_unmap_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma); + +struct nvkm_vma *nvkm_vma_tail(struct nvkm_vma *, u64 tail); +void nvkm_vmm_node_insert(struct nvkm_vmm *, struct nvkm_vma *); + +int nv04_vmm_new_(const struct nvkm_vmm_func *, struct nvkm_mmu *, u32, + u64, u64, void *, u32, struct lock_class_key *, + const char *, struct nvkm_vmm **); +int nv04_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *); + +int gf100_vmm_new_(const struct nvkm_vmm_func *, const struct nvkm_vmm_func *, + struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); +int gf100_vmm_join_(struct nvkm_vmm *, struct nvkm_memory *, u64 base); +int gf100_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); +void gf100_vmm_part(struct nvkm_vmm *, struct nvkm_memory *); +int gf100_vmm_aper(enum nvkm_memory_target); +int gf100_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *); +void gf100_vmm_flush_(struct nvkm_vmm *, int); +void gf100_vmm_flush(struct nvkm_vmm *, int); + +int gk20a_vmm_aper(enum nvkm_memory_target); + +int gm200_vmm_new_(const struct nvkm_vmm_func *, const struct nvkm_vmm_func *, + struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); +int gm200_vmm_join_(struct nvkm_vmm *, struct nvkm_memory *, u64 base); +int gm200_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); + +int gp100_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); +int gp100_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *); +void gp100_vmm_flush(struct nvkm_vmm *, int); + +int nv04_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); +int nv41_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); +int nv44_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); +int nv50_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); +int g84_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); +int gf100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); +int gk104_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); +int gk20a_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); +int gm200_vmm_new_fixed(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, + struct nvkm_vmm **); +int gm200_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, + struct nvkm_vmm **); +int gm20b_vmm_new_fixed(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, + struct nvkm_vmm **); +int gm20b_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, + struct nvkm_vmm **); +int gp100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, + struct nvkm_vmm **); +int gp10b_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, + struct nvkm_vmm **); + +#define VMM_PRINT(l,v,p,f,a...) do { \ + struct nvkm_vmm *_vmm = (v); \ + if (CONFIG_NOUVEAU_DEBUG >= (l) && _vmm->debug >= (l)) { \ + nvkm_printk_(&_vmm->mmu->subdev, 0, p, "%s: "f"\n", \ + _vmm->name, ##a); \ + } \ +} while(0) +#define VMM_DEBUG(v,f,a...) VMM_PRINT(NV_DBG_DEBUG, (v), info, f, ##a) +#define VMM_TRACE(v,f,a...) VMM_PRINT(NV_DBG_TRACE, (v), info, f, ##a) +#define VMM_SPAM(v,f,a...) VMM_PRINT(NV_DBG_SPAM , (v), dbg, f, ##a) + +#define VMM_MAP_ITER(VMM,PT,PTEI,PTEN,MAP,FILL,BASE,SIZE,NEXT) do { \ + nvkm_kmap((PT)->memory); \ + while (PTEN) { \ + u64 _ptes = ((SIZE) - MAP->off) >> MAP->page->shift; \ + u64 _addr = ((BASE) + MAP->off); \ + \ + if (_ptes > PTEN) { \ + MAP->off += PTEN << MAP->page->shift; \ + _ptes = PTEN; \ + } else { \ + MAP->off = 0; \ + NEXT; \ + } \ + \ + VMM_SPAM(VMM, "ITER %08x %08x PTE(s)", PTEI, (u32)_ptes); \ + \ + FILL(VMM, PT, PTEI, _ptes, MAP, _addr); \ + PTEI += _ptes; \ + PTEN -= _ptes; \ + }; \ + nvkm_done((PT)->memory); \ +} while(0) + +#define VMM_MAP_ITER_MEM(VMM,PT,PTEI,PTEN,MAP,FILL) \ + VMM_MAP_ITER(VMM,PT,PTEI,PTEN,MAP,FILL, \ + ((u64)MAP->mem->offset << NVKM_RAM_MM_SHIFT), \ + ((u64)MAP->mem->length << NVKM_RAM_MM_SHIFT), \ + (MAP->mem = MAP->mem->next)) +#define VMM_MAP_ITER_DMA(VMM,PT,PTEI,PTEN,MAP,FILL) \ + VMM_MAP_ITER(VMM,PT,PTEI,PTEN,MAP,FILL, \ + *MAP->dma, PAGE_SIZE, MAP->dma++) +#define VMM_MAP_ITER_SGL(VMM,PT,PTEI,PTEN,MAP,FILL) \ + VMM_MAP_ITER(VMM,PT,PTEI,PTEN,MAP,FILL, \ + sg_dma_address(MAP->sgl), sg_dma_len(MAP->sgl), \ + (MAP->sgl = sg_next(MAP->sgl))) + +#define VMM_FO(m,o,d,c,b) nvkm_fo##b((m)->memory, (o), (d), (c)) +#define VMM_WO(m,o,d,c,b) nvkm_wo##b((m)->memory, (o), (d)) +#define VMM_XO(m,v,o,d,c,b,fn,f,a...) do { \ + const u32 _pteo = (o); u##b _data = (d); \ + VMM_SPAM((v), " %010llx "f, (m)->addr + _pteo, _data, ##a); \ + VMM_##fn((m), (m)->base + _pteo, _data, (c), b); \ +} while(0) + +#define VMM_WO032(m,v,o,d) VMM_XO((m),(v),(o),(d), 1, 32, WO, "%08x") +#define VMM_FO032(m,v,o,d,c) \ + VMM_XO((m),(v),(o),(d),(c), 32, FO, "%08x %08x", (c)) + +#define VMM_WO064(m,v,o,d) VMM_XO((m),(v),(o),(d), 1, 64, WO, "%016llx") +#define VMM_FO064(m,v,o,d,c) \ + VMM_XO((m),(v),(o),(d),(c), 64, FO, "%016llx %08x", (c)) + +#define VMM_XO128(m,v,o,lo,hi,c,f,a...) do { \ + u32 _pteo = (o), _ptes = (c); \ + const u64 _addr = (m)->addr + _pteo; \ + VMM_SPAM((v), " %010llx %016llx%016llx"f, _addr, (hi), (lo), ##a); \ + while (_ptes--) { \ + nvkm_wo64((m)->memory, (m)->base + _pteo + 0, (lo)); \ + nvkm_wo64((m)->memory, (m)->base + _pteo + 8, (hi)); \ + _pteo += 0x10; \ + } \ +} while(0) + +#define VMM_WO128(m,v,o,lo,hi) VMM_XO128((m),(v),(o),(lo),(hi), 1, "") +#define VMM_FO128(m,v,o,lo,hi,c) do { \ + nvkm_kmap((m)->memory); \ + VMM_XO128((m),(v),(o),(lo),(hi),(c), " %08x", (c)); \ + nvkm_done((m)->memory); \ +} while(0) +#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c new file mode 100644 index 000000000000..faf5a7e9265e --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c @@ -0,0 +1,403 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +#include <subdev/fb.h> +#include <subdev/ltc.h> +#include <subdev/timer.h> + +#include <nvif/if900d.h> +#include <nvif/unpack.h> + +static inline void +gf100_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) +{ + u64 base = (addr >> 8) | map->type; + u64 data = base; + + if (map->ctag && !(map->next & (1ULL << 44))) { + while (ptes--) { + data = base | ((map->ctag >> 1) << 44); + if (!(map->ctag++ & 1)) + data |= BIT_ULL(60); + + VMM_WO064(pt, vmm, ptei++ * 8, data); + base += map->next; + } + } else { + map->type += ptes * map->ctag; + + while (ptes--) { + VMM_WO064(pt, vmm, ptei++ * 8, data); + data += map->next; + } + } +} + +void +gf100_vmm_pgt_sgl(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + VMM_MAP_ITER_SGL(vmm, pt, ptei, ptes, map, gf100_vmm_pgt_pte); +} + +void +gf100_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + if (map->page->shift == PAGE_SHIFT) { + VMM_SPAM(vmm, "DMAA %08x %08x PTE(s)", ptei, ptes); + nvkm_kmap(pt->memory); + while (ptes--) { + const u64 data = (*map->dma++ >> 8) | map->type; + VMM_WO064(pt, vmm, ptei++ * 8, data); + map->type += map->ctag; + } + nvkm_done(pt->memory); + return; + } + + VMM_MAP_ITER_DMA(vmm, pt, ptei, ptes, map, gf100_vmm_pgt_pte); +} + +void +gf100_vmm_pgt_mem(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + VMM_MAP_ITER_MEM(vmm, pt, ptei, ptes, map, gf100_vmm_pgt_pte); +} + +void +gf100_vmm_pgt_unmap(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes) +{ + VMM_FO064(pt, vmm, ptei * 8, 0ULL, ptes); +} + +const struct nvkm_vmm_desc_func +gf100_vmm_pgt = { + .unmap = gf100_vmm_pgt_unmap, + .mem = gf100_vmm_pgt_mem, + .dma = gf100_vmm_pgt_dma, + .sgl = gf100_vmm_pgt_sgl, +}; + +void +gf100_vmm_pgd_pde(struct nvkm_vmm *vmm, struct nvkm_vmm_pt *pgd, u32 pdei) +{ + struct nvkm_vmm_pt *pgt = pgd->pde[pdei]; + struct nvkm_mmu_pt *pd = pgd->pt[0]; + struct nvkm_mmu_pt *pt; + u64 data = 0; + + if ((pt = pgt->pt[0])) { + switch (nvkm_memory_target(pt->memory)) { + case NVKM_MEM_TARGET_VRAM: data |= 1ULL << 0; break; + case NVKM_MEM_TARGET_HOST: data |= 2ULL << 0; + data |= BIT_ULL(35); /* VOL */ + break; + case NVKM_MEM_TARGET_NCOH: data |= 3ULL << 0; break; + default: + WARN_ON(1); + return; + } + data |= pt->addr >> 8; + } + + if ((pt = pgt->pt[1])) { + switch (nvkm_memory_target(pt->memory)) { + case NVKM_MEM_TARGET_VRAM: data |= 1ULL << 32; break; + case NVKM_MEM_TARGET_HOST: data |= 2ULL << 32; + data |= BIT_ULL(34); /* VOL */ + break; + case NVKM_MEM_TARGET_NCOH: data |= 3ULL << 32; break; + default: + WARN_ON(1); + return; + } + data |= pt->addr << 24; + } + + nvkm_kmap(pd->memory); + VMM_WO064(pd, vmm, pdei * 8, data); + nvkm_done(pd->memory); +} + +const struct nvkm_vmm_desc_func +gf100_vmm_pgd = { + .unmap = gf100_vmm_pgt_unmap, + .pde = gf100_vmm_pgd_pde, +}; + +static const struct nvkm_vmm_desc +gf100_vmm_desc_17_12[] = { + { SPT, 15, 8, 0x1000, &gf100_vmm_pgt }, + { PGD, 13, 8, 0x1000, &gf100_vmm_pgd }, + {} +}; + +static const struct nvkm_vmm_desc +gf100_vmm_desc_17_17[] = { + { LPT, 10, 8, 0x1000, &gf100_vmm_pgt }, + { PGD, 13, 8, 0x1000, &gf100_vmm_pgd }, + {} +}; + +static const struct nvkm_vmm_desc +gf100_vmm_desc_16_12[] = { + { SPT, 14, 8, 0x1000, &gf100_vmm_pgt }, + { PGD, 14, 8, 0x1000, &gf100_vmm_pgd }, + {} +}; + +static const struct nvkm_vmm_desc +gf100_vmm_desc_16_16[] = { + { LPT, 10, 8, 0x1000, &gf100_vmm_pgt }, + { PGD, 14, 8, 0x1000, &gf100_vmm_pgd }, + {} +}; + +void +gf100_vmm_flush_(struct nvkm_vmm *vmm, int depth) +{ + struct nvkm_subdev *subdev = &vmm->mmu->subdev; + struct nvkm_device *device = subdev->device; + u32 type = depth << 24; + + type = 0x00000001; /* PAGE_ALL */ + if (atomic_read(&vmm->engref[NVKM_SUBDEV_BAR])) + type |= 0x00000004; /* HUB_ONLY */ + + mutex_lock(&subdev->mutex); + /* Looks like maybe a "free flush slots" counter, the + * faster you write to 0x100cbc to more it decreases. + */ + nvkm_msec(device, 2000, + if (nvkm_rd32(device, 0x100c80) & 0x00ff0000) + break; + ); + + nvkm_wr32(device, 0x100cb8, vmm->pd->pt[0]->addr >> 8); + nvkm_wr32(device, 0x100cbc, 0x80000000 | type); + + /* Wait for flush to be queued? */ + nvkm_msec(device, 2000, + if (nvkm_rd32(device, 0x100c80) & 0x00008000) + break; + ); + mutex_unlock(&subdev->mutex); +} + +void +gf100_vmm_flush(struct nvkm_vmm *vmm, int depth) +{ + gf100_vmm_flush_(vmm, 0); +} + +int +gf100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, + struct nvkm_vmm_map *map) +{ + const enum nvkm_memory_target target = nvkm_memory_target(map->memory); + const struct nvkm_vmm_page *page = map->page; + const bool gm20x = page->desc->func->sparse != NULL; + union { + struct gf100_vmm_map_vn vn; + struct gf100_vmm_map_v0 v0; + } *args = argv; + struct nvkm_device *device = vmm->mmu->subdev.device; + struct nvkm_memory *memory = map->memory; + u8 kind, priv, ro, vol; + int kindn, aper, ret = -ENOSYS; + const u8 *kindm; + + map->next = (1 << page->shift) >> 8; + map->type = map->ctag = 0; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + vol = !!args->v0.vol; + ro = !!args->v0.ro; + priv = !!args->v0.priv; + kind = args->v0.kind; + } else + if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) { + vol = target == NVKM_MEM_TARGET_HOST; + ro = 0; + priv = 0; + kind = 0x00; + } else { + VMM_DEBUG(vmm, "args"); + return ret; + } + + aper = vmm->func->aper(target); + if (WARN_ON(aper < 0)) + return aper; + + kindm = vmm->mmu->func->kind(vmm->mmu, &kindn); + if (kind >= kindn || kindm[kind] == 0xff) { + VMM_DEBUG(vmm, "kind %02x", kind); + return -EINVAL; + } + + if (kindm[kind] != kind) { + u32 comp = (page->shift == 16 && !gm20x) ? 16 : 17; + u32 tags = ALIGN(nvkm_memory_size(memory), 1 << 17) >> comp; + if (aper != 0 || !(page->type & NVKM_VMM_PAGE_COMP)) { + VMM_DEBUG(vmm, "comp %d %02x", aper, page->type); + return -EINVAL; + } + + ret = nvkm_memory_tags_get(memory, device, tags, + nvkm_ltc_tags_clear, + &map->tags); + if (ret) { + VMM_DEBUG(vmm, "comp %d", ret); + return ret; + } + + if (map->tags->mn) { + u64 tags = map->tags->mn->offset + (map->offset >> 17); + if (page->shift == 17 || !gm20x) { + map->type |= tags << 44; + map->ctag |= 1ULL << 44; + map->next |= 1ULL << 44; + } else { + map->ctag |= tags << 1 | 1; + } + } else { + kind = kindm[kind]; + } + } + + map->type |= BIT(0); + map->type |= (u64)priv << 1; + map->type |= (u64) ro << 2; + map->type |= (u64) vol << 32; + map->type |= (u64)aper << 33; + map->type |= (u64)kind << 36; + return 0; +} + +int +gf100_vmm_aper(enum nvkm_memory_target target) +{ + switch (target) { + case NVKM_MEM_TARGET_VRAM: return 0; + case NVKM_MEM_TARGET_HOST: return 2; + case NVKM_MEM_TARGET_NCOH: return 3; + default: + return -EINVAL; + } +} + +void +gf100_vmm_part(struct nvkm_vmm *vmm, struct nvkm_memory *inst) +{ + nvkm_fo64(inst, 0x0200, 0x00000000, 2); +} + +int +gf100_vmm_join_(struct nvkm_vmm *vmm, struct nvkm_memory *inst, u64 base) +{ + struct nvkm_mmu_pt *pd = vmm->pd->pt[0]; + + switch (nvkm_memory_target(pd->memory)) { + case NVKM_MEM_TARGET_VRAM: base |= 0ULL << 0; break; + case NVKM_MEM_TARGET_HOST: base |= 2ULL << 0; + base |= BIT_ULL(2) /* VOL. */; + break; + case NVKM_MEM_TARGET_NCOH: base |= 3ULL << 0; break; + default: + WARN_ON(1); + return -EINVAL; + } + base |= pd->addr; + + nvkm_kmap(inst); + nvkm_wo64(inst, 0x0200, base); + nvkm_wo64(inst, 0x0208, vmm->limit - 1); + nvkm_done(inst); + return 0; +} + +int +gf100_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst) +{ + return gf100_vmm_join_(vmm, inst, 0); +} + +static const struct nvkm_vmm_func +gf100_vmm_17 = { + .join = gf100_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, + .valid = gf100_vmm_valid, + .flush = gf100_vmm_flush, + .page = { + { 17, &gf100_vmm_desc_17_17[0], NVKM_VMM_PAGE_xVxC }, + { 12, &gf100_vmm_desc_17_12[0], NVKM_VMM_PAGE_xVHx }, + {} + } +}; + +static const struct nvkm_vmm_func +gf100_vmm_16 = { + .join = gf100_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, + .valid = gf100_vmm_valid, + .flush = gf100_vmm_flush, + .page = { + { 16, &gf100_vmm_desc_16_16[0], NVKM_VMM_PAGE_xVxC }, + { 12, &gf100_vmm_desc_16_12[0], NVKM_VMM_PAGE_xVHx }, + {} + } +}; + +int +gf100_vmm_new_(const struct nvkm_vmm_func *func_16, + const struct nvkm_vmm_func *func_17, + struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + switch (mmu->subdev.device->fb->page) { + case 16: return nv04_vmm_new_(func_16, mmu, 0, addr, size, + argv, argc, key, name, pvmm); + case 17: return nv04_vmm_new_(func_17, mmu, 0, addr, size, + argv, argc, key, name, pvmm); + default: + WARN_ON(1); + return -EINVAL; + } +} + +int +gf100_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return gf100_vmm_new_(&gf100_vmm_16, &gf100_vmm_17, mmu, addr, + size, argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk104.c new file mode 100644 index 000000000000..0ebb7bccfcd2 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk104.c @@ -0,0 +1,102 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +void +gk104_vmm_lpt_invalid(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes) +{ + /* VALID_FALSE + PRIV tells the MMU to ignore corresponding SPTEs. */ + VMM_FO064(pt, vmm, ptei * 8, BIT_ULL(1) /* PRIV. */, ptes); +} + +static const struct nvkm_vmm_desc_func +gk104_vmm_lpt = { + .invalid = gk104_vmm_lpt_invalid, + .unmap = gf100_vmm_pgt_unmap, + .mem = gf100_vmm_pgt_mem, +}; + +const struct nvkm_vmm_desc +gk104_vmm_desc_17_12[] = { + { SPT, 15, 8, 0x1000, &gf100_vmm_pgt }, + { PGD, 13, 8, 0x1000, &gf100_vmm_pgd }, + {} +}; + +const struct nvkm_vmm_desc +gk104_vmm_desc_17_17[] = { + { LPT, 10, 8, 0x1000, &gk104_vmm_lpt }, + { PGD, 13, 8, 0x1000, &gf100_vmm_pgd }, + {} +}; + +const struct nvkm_vmm_desc +gk104_vmm_desc_16_12[] = { + { SPT, 14, 8, 0x1000, &gf100_vmm_pgt }, + { PGD, 14, 8, 0x1000, &gf100_vmm_pgd }, + {} +}; + +const struct nvkm_vmm_desc +gk104_vmm_desc_16_16[] = { + { LPT, 10, 8, 0x1000, &gk104_vmm_lpt }, + { PGD, 14, 8, 0x1000, &gf100_vmm_pgd }, + {} +}; + +static const struct nvkm_vmm_func +gk104_vmm_17 = { + .join = gf100_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, + .valid = gf100_vmm_valid, + .flush = gf100_vmm_flush, + .page = { + { 17, &gk104_vmm_desc_17_17[0], NVKM_VMM_PAGE_xVxC }, + { 12, &gk104_vmm_desc_17_12[0], NVKM_VMM_PAGE_xVHx }, + {} + } +}; + +static const struct nvkm_vmm_func +gk104_vmm_16 = { + .join = gf100_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, + .valid = gf100_vmm_valid, + .flush = gf100_vmm_flush, + .page = { + { 16, &gk104_vmm_desc_16_16[0], NVKM_VMM_PAGE_xVxC }, + { 12, &gk104_vmm_desc_16_12[0], NVKM_VMM_PAGE_xVHx }, + {} + } +}; + +int +gk104_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return gf100_vmm_new_(&gk104_vmm_16, &gk104_vmm_17, mmu, addr, + size, argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c new file mode 100644 index 000000000000..8086994a0446 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c @@ -0,0 +1,71 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +#include <core/memory.h> + +int +gk20a_vmm_aper(enum nvkm_memory_target target) +{ + switch (target) { + case NVKM_MEM_TARGET_NCOH: return 0; + default: + return -EINVAL; + } +} + +static const struct nvkm_vmm_func +gk20a_vmm_17 = { + .join = gf100_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, + .valid = gf100_vmm_valid, + .flush = gf100_vmm_flush, + .page = { + { 17, &gk104_vmm_desc_17_17[0], NVKM_VMM_PAGE_xxHC }, + { 12, &gk104_vmm_desc_17_12[0], NVKM_VMM_PAGE_xxHx }, + {} + } +}; + +static const struct nvkm_vmm_func +gk20a_vmm_16 = { + .join = gf100_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, + .valid = gf100_vmm_valid, + .flush = gf100_vmm_flush, + .page = { + { 16, &gk104_vmm_desc_16_16[0], NVKM_VMM_PAGE_xxHC }, + { 12, &gk104_vmm_desc_16_12[0], NVKM_VMM_PAGE_xxHx }, + {} + } +}; + +int +gk20a_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return gf100_vmm_new_(&gk20a_vmm_16, &gk20a_vmm_17, mmu, addr, + size, argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm200.c new file mode 100644 index 000000000000..a1676a4644fe --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm200.c @@ -0,0 +1,185 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +#include <nvif/ifb00d.h> +#include <nvif/unpack.h> + +static void +gm200_vmm_pgt_sparse(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes) +{ + /* VALID_FALSE + VOL tells the MMU to treat the PTE as sparse. */ + VMM_FO064(pt, vmm, ptei * 8, BIT_ULL(32) /* VOL. */, ptes); +} + +static const struct nvkm_vmm_desc_func +gm200_vmm_spt = { + .unmap = gf100_vmm_pgt_unmap, + .sparse = gm200_vmm_pgt_sparse, + .mem = gf100_vmm_pgt_mem, + .dma = gf100_vmm_pgt_dma, + .sgl = gf100_vmm_pgt_sgl, +}; + +static const struct nvkm_vmm_desc_func +gm200_vmm_lpt = { + .invalid = gk104_vmm_lpt_invalid, + .unmap = gf100_vmm_pgt_unmap, + .sparse = gm200_vmm_pgt_sparse, + .mem = gf100_vmm_pgt_mem, +}; + +static void +gm200_vmm_pgd_sparse(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 pdei, u32 pdes) +{ + /* VALID_FALSE + VOL_BIG tells the MMU to treat the PDE as sparse. */ + VMM_FO064(pt, vmm, pdei * 8, BIT_ULL(35) /* VOL_BIG. */, pdes); +} + +static const struct nvkm_vmm_desc_func +gm200_vmm_pgd = { + .unmap = gf100_vmm_pgt_unmap, + .sparse = gm200_vmm_pgd_sparse, + .pde = gf100_vmm_pgd_pde, +}; + +const struct nvkm_vmm_desc +gm200_vmm_desc_17_12[] = { + { SPT, 15, 8, 0x1000, &gm200_vmm_spt }, + { PGD, 13, 8, 0x1000, &gm200_vmm_pgd }, + {} +}; + +const struct nvkm_vmm_desc +gm200_vmm_desc_17_17[] = { + { LPT, 10, 8, 0x1000, &gm200_vmm_lpt }, + { PGD, 13, 8, 0x1000, &gm200_vmm_pgd }, + {} +}; + +const struct nvkm_vmm_desc +gm200_vmm_desc_16_12[] = { + { SPT, 14, 8, 0x1000, &gm200_vmm_spt }, + { PGD, 14, 8, 0x1000, &gm200_vmm_pgd }, + {} +}; + +const struct nvkm_vmm_desc +gm200_vmm_desc_16_16[] = { + { LPT, 10, 8, 0x1000, &gm200_vmm_lpt }, + { PGD, 14, 8, 0x1000, &gm200_vmm_pgd }, + {} +}; + +int +gm200_vmm_join_(struct nvkm_vmm *vmm, struct nvkm_memory *inst, u64 base) +{ + if (vmm->func->page[1].shift == 16) + base |= BIT_ULL(11); + return gf100_vmm_join_(vmm, inst, base); +} + +int +gm200_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst) +{ + return gm200_vmm_join_(vmm, inst, 0); +} + +static const struct nvkm_vmm_func +gm200_vmm_17 = { + .join = gm200_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, + .valid = gf100_vmm_valid, + .flush = gf100_vmm_flush, + .page = { + { 27, &gm200_vmm_desc_17_17[1], NVKM_VMM_PAGE_Sxxx }, + { 17, &gm200_vmm_desc_17_17[0], NVKM_VMM_PAGE_SVxC }, + { 12, &gm200_vmm_desc_17_12[0], NVKM_VMM_PAGE_SVHx }, + {} + } +}; + +static const struct nvkm_vmm_func +gm200_vmm_16 = { + .join = gm200_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, + .valid = gf100_vmm_valid, + .flush = gf100_vmm_flush, + .page = { + { 27, &gm200_vmm_desc_16_16[1], NVKM_VMM_PAGE_Sxxx }, + { 16, &gm200_vmm_desc_16_16[0], NVKM_VMM_PAGE_SVxC }, + { 12, &gm200_vmm_desc_16_12[0], NVKM_VMM_PAGE_SVHx }, + {} + } +}; + +int +gm200_vmm_new_(const struct nvkm_vmm_func *func_16, + const struct nvkm_vmm_func *func_17, + struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + const struct nvkm_vmm_func *func; + union { + struct gm200_vmm_vn vn; + struct gm200_vmm_v0 v0; + } *args = argv; + int ret = -ENOSYS; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + switch (args->v0.bigpage) { + case 16: func = func_16; break; + case 17: func = func_17; break; + default: + return -EINVAL; + } + } else + if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) { + func = func_17; + } else + return ret; + + return nvkm_vmm_new_(func, mmu, 0, addr, size, key, name, pvmm); +} + +int +gm200_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return gm200_vmm_new_(&gm200_vmm_16, &gm200_vmm_17, mmu, addr, + size, argv, argc, key, name, pvmm); +} + +int +gm200_vmm_new_fixed(struct nvkm_mmu *mmu, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, + const char *name, struct nvkm_vmm **pvmm) +{ + return gf100_vmm_new_(&gm200_vmm_16, &gm200_vmm_17, mmu, addr, + size, argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c new file mode 100644 index 000000000000..64d4b6cff8dd --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c @@ -0,0 +1,70 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +static const struct nvkm_vmm_func +gm20b_vmm_17 = { + .join = gm200_vmm_join, + .part = gf100_vmm_part, + .aper = gk20a_vmm_aper, + .valid = gf100_vmm_valid, + .flush = gf100_vmm_flush, + .page = { + { 27, &gm200_vmm_desc_17_17[1], NVKM_VMM_PAGE_Sxxx }, + { 17, &gm200_vmm_desc_17_17[0], NVKM_VMM_PAGE_SxHC }, + { 12, &gm200_vmm_desc_17_12[0], NVKM_VMM_PAGE_SxHx }, + {} + } +}; + +static const struct nvkm_vmm_func +gm20b_vmm_16 = { + .join = gm200_vmm_join, + .part = gf100_vmm_part, + .aper = gk20a_vmm_aper, + .valid = gf100_vmm_valid, + .flush = gf100_vmm_flush, + .page = { + { 27, &gm200_vmm_desc_16_16[1], NVKM_VMM_PAGE_Sxxx }, + { 16, &gm200_vmm_desc_16_16[0], NVKM_VMM_PAGE_SxHC }, + { 12, &gm200_vmm_desc_16_12[0], NVKM_VMM_PAGE_SxHx }, + {} + } +}; + +int +gm20b_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return gm200_vmm_new_(&gm20b_vmm_16, &gm20b_vmm_17, mmu, addr, + size, argv, argc, key, name, pvmm); +} + +int +gm20b_vmm_new_fixed(struct nvkm_mmu *mmu, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, + const char *name, struct nvkm_vmm **pvmm) +{ + return gf100_vmm_new_(&gm20b_vmm_16, &gm20b_vmm_17, mmu, addr, + size, argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c new file mode 100644 index 000000000000..059fafe0e771 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c @@ -0,0 +1,347 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +#include <subdev/fb.h> +#include <subdev/ltc.h> + +#include <nvif/ifc00d.h> +#include <nvif/unpack.h> + +static inline void +gp100_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) +{ + u64 data = (addr >> 4) | map->type; + + map->type += ptes * map->ctag; + + while (ptes--) { + VMM_WO064(pt, vmm, ptei++ * 8, data); + data += map->next; + } +} + +static void +gp100_vmm_pgt_sgl(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + VMM_MAP_ITER_SGL(vmm, pt, ptei, ptes, map, gp100_vmm_pgt_pte); +} + +static void +gp100_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + if (map->page->shift == PAGE_SHIFT) { + VMM_SPAM(vmm, "DMAA %08x %08x PTE(s)", ptei, ptes); + nvkm_kmap(pt->memory); + while (ptes--) { + const u64 data = (*map->dma++ >> 4) | map->type; + VMM_WO064(pt, vmm, ptei++ * 8, data); + map->type += map->ctag; + } + nvkm_done(pt->memory); + return; + } + + VMM_MAP_ITER_DMA(vmm, pt, ptei, ptes, map, gp100_vmm_pgt_pte); +} + +static void +gp100_vmm_pgt_mem(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + VMM_MAP_ITER_MEM(vmm, pt, ptei, ptes, map, gp100_vmm_pgt_pte); +} + +static void +gp100_vmm_pgt_sparse(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes) +{ + /* VALID_FALSE + VOL tells the MMU to treat the PTE as sparse. */ + VMM_FO064(pt, vmm, ptei * 8, BIT_ULL(3) /* VOL. */, ptes); +} + +static const struct nvkm_vmm_desc_func +gp100_vmm_desc_spt = { + .unmap = gf100_vmm_pgt_unmap, + .sparse = gp100_vmm_pgt_sparse, + .mem = gp100_vmm_pgt_mem, + .dma = gp100_vmm_pgt_dma, + .sgl = gp100_vmm_pgt_sgl, +}; + +static void +gp100_vmm_lpt_invalid(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes) +{ + /* VALID_FALSE + PRIV tells the MMU to ignore corresponding SPTEs. */ + VMM_FO064(pt, vmm, ptei * 8, BIT_ULL(5) /* PRIV. */, ptes); +} + +static const struct nvkm_vmm_desc_func +gp100_vmm_desc_lpt = { + .invalid = gp100_vmm_lpt_invalid, + .unmap = gf100_vmm_pgt_unmap, + .sparse = gp100_vmm_pgt_sparse, + .mem = gp100_vmm_pgt_mem, +}; + +static inline void +gp100_vmm_pd0_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) +{ + u64 data = (addr >> 4) | map->type; + + map->type += ptes * map->ctag; + + while (ptes--) { + VMM_WO128(pt, vmm, ptei++ * 0x10, data, 0ULL); + data += map->next; + } +} + +static void +gp100_vmm_pd0_mem(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + VMM_MAP_ITER_MEM(vmm, pt, ptei, ptes, map, gp100_vmm_pd0_pte); +} + +static inline bool +gp100_vmm_pde(struct nvkm_mmu_pt *pt, u64 *data) +{ + switch (nvkm_memory_target(pt->memory)) { + case NVKM_MEM_TARGET_VRAM: *data |= 1ULL << 1; break; + case NVKM_MEM_TARGET_HOST: *data |= 2ULL << 1; + *data |= BIT_ULL(3); /* VOL. */ + break; + case NVKM_MEM_TARGET_NCOH: *data |= 3ULL << 1; break; + default: + WARN_ON(1); + return false; + } + *data |= pt->addr >> 4; + return true; +} + +static void +gp100_vmm_pd0_pde(struct nvkm_vmm *vmm, struct nvkm_vmm_pt *pgd, u32 pdei) +{ + struct nvkm_vmm_pt *pgt = pgd->pde[pdei]; + struct nvkm_mmu_pt *pd = pgd->pt[0]; + u64 data[2] = {}; + + if (pgt->pt[0] && !gp100_vmm_pde(pgt->pt[0], &data[0])) + return; + if (pgt->pt[1] && !gp100_vmm_pde(pgt->pt[1], &data[1])) + return; + + nvkm_kmap(pd->memory); + VMM_WO128(pd, vmm, pdei * 0x10, data[0], data[1]); + nvkm_done(pd->memory); +} + +static void +gp100_vmm_pd0_sparse(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 pdei, u32 pdes) +{ + /* VALID_FALSE + VOL_BIG tells the MMU to treat the PDE as sparse. */ + VMM_FO128(pt, vmm, pdei * 0x10, BIT_ULL(3) /* VOL_BIG. */, 0ULL, pdes); +} + +static void +gp100_vmm_pd0_unmap(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 pdei, u32 pdes) +{ + VMM_FO128(pt, vmm, pdei * 0x10, 0ULL, 0ULL, pdes); +} + +static const struct nvkm_vmm_desc_func +gp100_vmm_desc_pd0 = { + .unmap = gp100_vmm_pd0_unmap, + .sparse = gp100_vmm_pd0_sparse, + .pde = gp100_vmm_pd0_pde, + .mem = gp100_vmm_pd0_mem, +}; + +static void +gp100_vmm_pd1_pde(struct nvkm_vmm *vmm, struct nvkm_vmm_pt *pgd, u32 pdei) +{ + struct nvkm_vmm_pt *pgt = pgd->pde[pdei]; + struct nvkm_mmu_pt *pd = pgd->pt[0]; + u64 data = 0; + + if (!gp100_vmm_pde(pgt->pt[0], &data)) + return; + + nvkm_kmap(pd->memory); + VMM_WO064(pd, vmm, pdei * 8, data); + nvkm_done(pd->memory); +} + +static const struct nvkm_vmm_desc_func +gp100_vmm_desc_pd1 = { + .unmap = gf100_vmm_pgt_unmap, + .sparse = gp100_vmm_pgt_sparse, + .pde = gp100_vmm_pd1_pde, +}; + +const struct nvkm_vmm_desc +gp100_vmm_desc_16[] = { + { LPT, 5, 8, 0x0100, &gp100_vmm_desc_lpt }, + { PGD, 8, 16, 0x1000, &gp100_vmm_desc_pd0 }, + { PGD, 9, 8, 0x1000, &gp100_vmm_desc_pd1 }, + { PGD, 9, 8, 0x1000, &gp100_vmm_desc_pd1 }, + { PGD, 2, 8, 0x1000, &gp100_vmm_desc_pd1 }, + {} +}; + +const struct nvkm_vmm_desc +gp100_vmm_desc_12[] = { + { SPT, 9, 8, 0x1000, &gp100_vmm_desc_spt }, + { PGD, 8, 16, 0x1000, &gp100_vmm_desc_pd0 }, + { PGD, 9, 8, 0x1000, &gp100_vmm_desc_pd1 }, + { PGD, 9, 8, 0x1000, &gp100_vmm_desc_pd1 }, + { PGD, 2, 8, 0x1000, &gp100_vmm_desc_pd1 }, + {} +}; + +int +gp100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, + struct nvkm_vmm_map *map) +{ + const enum nvkm_memory_target target = nvkm_memory_target(map->memory); + const struct nvkm_vmm_page *page = map->page; + union { + struct gp100_vmm_map_vn vn; + struct gp100_vmm_map_v0 v0; + } *args = argv; + struct nvkm_device *device = vmm->mmu->subdev.device; + struct nvkm_memory *memory = map->memory; + u8 kind, priv, ro, vol; + int kindn, aper, ret = -ENOSYS; + const u8 *kindm; + + map->next = (1ULL << page->shift) >> 4; + map->type = 0; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + vol = !!args->v0.vol; + ro = !!args->v0.ro; + priv = !!args->v0.priv; + kind = args->v0.kind; + } else + if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) { + vol = target == NVKM_MEM_TARGET_HOST; + ro = 0; + priv = 0; + kind = 0x00; + } else { + VMM_DEBUG(vmm, "args"); + return ret; + } + + aper = vmm->func->aper(target); + if (WARN_ON(aper < 0)) + return aper; + + kindm = vmm->mmu->func->kind(vmm->mmu, &kindn); + if (kind >= kindn || kindm[kind] == 0xff) { + VMM_DEBUG(vmm, "kind %02x", kind); + return -EINVAL; + } + + if (kindm[kind] != kind) { + u64 tags = nvkm_memory_size(memory) >> 16; + if (aper != 0 || !(page->type & NVKM_VMM_PAGE_COMP)) { + VMM_DEBUG(vmm, "comp %d %02x", aper, page->type); + return -EINVAL; + } + + ret = nvkm_memory_tags_get(memory, device, tags, + nvkm_ltc_tags_clear, + &map->tags); + if (ret) { + VMM_DEBUG(vmm, "comp %d", ret); + return ret; + } + + if (map->tags->mn) { + tags = map->tags->mn->offset + (map->offset >> 16); + map->ctag |= ((1ULL << page->shift) >> 16) << 36; + map->type |= tags << 36; + map->next |= map->ctag; + } else { + kind = kindm[kind]; + } + } + + map->type |= BIT(0); + map->type |= (u64)aper << 1; + map->type |= (u64) vol << 3; + map->type |= (u64)priv << 5; + map->type |= (u64) ro << 6; + map->type |= (u64)kind << 56; + return 0; +} + +void +gp100_vmm_flush(struct nvkm_vmm *vmm, int depth) +{ + gf100_vmm_flush_(vmm, 5 /* CACHE_LEVEL_UP_TO_PDE3 */ - depth); +} + +int +gp100_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst) +{ + const u64 base = BIT_ULL(10) /* VER2 */ | BIT_ULL(11); /* 64KiB */ + return gf100_vmm_join_(vmm, inst, base); +} + +static const struct nvkm_vmm_func +gp100_vmm = { + .join = gp100_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, + .valid = gp100_vmm_valid, + .flush = gp100_vmm_flush, + .page = { + { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, + { 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx }, + { 29, &gp100_vmm_desc_16[2], NVKM_VMM_PAGE_Sxxx }, + { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SVxC }, + { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SVxC }, + { 12, &gp100_vmm_desc_12[0], NVKM_VMM_PAGE_SVHx }, + {} + } +}; + +int +gp100_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return nv04_vmm_new_(&gp100_vmm, mmu, 0, addr, size, + argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c new file mode 100644 index 000000000000..3dcc6bddb32f --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c @@ -0,0 +1,49 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +static const struct nvkm_vmm_func +gp10b_vmm = { + .join = gp100_vmm_join, + .part = gf100_vmm_part, + .aper = gk20a_vmm_aper, + .valid = gp100_vmm_valid, + .flush = gp100_vmm_flush, + .page = { + { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, + { 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx }, + { 29, &gp100_vmm_desc_16[2], NVKM_VMM_PAGE_Sxxx }, + { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SxHC }, + { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SxHC }, + { 12, &gp100_vmm_desc_12[0], NVKM_VMM_PAGE_SxHx }, + {} + } +}; + +int +gp10b_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return nv04_vmm_new_(&gp10b_vmm, mmu, 0, addr, size, + argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv04.c new file mode 100644 index 000000000000..0cab1ffc9f64 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv04.c @@ -0,0 +1,140 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +#include <nvif/if000d.h> +#include <nvif/unpack.h> + +static inline void +nv04_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) +{ + u32 data = addr | 0x00000003; /* PRESENT, RW. */ + while (ptes--) { + VMM_WO032(pt, vmm, 8 + ptei++ * 4, data); + data += 0x00001000; + } +} + +static void +nv04_vmm_pgt_sgl(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + VMM_MAP_ITER_SGL(vmm, pt, ptei, ptes, map, nv04_vmm_pgt_pte); +} + +static void +nv04_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ +#if PAGE_SHIFT == 12 + nvkm_kmap(pt->memory); + while (ptes--) + VMM_WO032(pt, vmm, 8 + (ptei++ * 4), *map->dma++ | 0x00000003); + nvkm_done(pt->memory); +#else + VMM_MAP_ITER_DMA(vmm, pt, ptei, ptes, map, nv04_vmm_pgt_pte); +#endif +} + +static void +nv04_vmm_pgt_unmap(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes) +{ + VMM_FO032(pt, vmm, 8 + (ptei * 4), 0, ptes); +} + +static const struct nvkm_vmm_desc_func +nv04_vmm_desc_pgt = { + .unmap = nv04_vmm_pgt_unmap, + .dma = nv04_vmm_pgt_dma, + .sgl = nv04_vmm_pgt_sgl, +}; + +static const struct nvkm_vmm_desc +nv04_vmm_desc_12[] = { + { PGT, 15, 4, 0x1000, &nv04_vmm_desc_pgt }, + {} +}; + +int +nv04_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, + struct nvkm_vmm_map *map) +{ + union { + struct nv04_vmm_map_vn vn; + } *args = argv; + int ret = -ENOSYS; + if ((ret = nvif_unvers(ret, &argv, &argc, args->vn))) + VMM_DEBUG(vmm, "args"); + return ret; +} + +static const struct nvkm_vmm_func +nv04_vmm = { + .valid = nv04_vmm_valid, + .page = { + { 12, &nv04_vmm_desc_12[0], NVKM_VMM_PAGE_HOST }, + {} + } +}; + +int +nv04_vmm_new_(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu, + u32 pd_header, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + union { + struct nv04_vmm_vn vn; + } *args = argv; + int ret; + + ret = nvkm_vmm_new_(func, mmu, pd_header, addr, size, key, name, pvmm); + if (ret) + return ret; + + return nvif_unvers(-ENOSYS, &argv, &argc, args->vn); +} + +int +nv04_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + struct nvkm_memory *mem; + struct nvkm_vmm *vmm; + int ret; + + ret = nv04_vmm_new_(&nv04_vmm, mmu, 8, addr, size, + argv, argc, key, name, &vmm); + *pvmm = vmm; + if (ret) + return ret; + + mem = vmm->pd->pt[0]->memory; + nvkm_kmap(mem); + nvkm_wo32(mem, 0x00000, 0x0002103d); /* PCI, RW, PT, !LN */ + nvkm_wo32(mem, 0x00004, vmm->limit - 1); + nvkm_done(mem); + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv41.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv41.c new file mode 100644 index 000000000000..b595f130e573 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv41.c @@ -0,0 +1,113 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +#include <subdev/timer.h> + +static void +nv41_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) +{ + u32 data = (addr >> 7) | 0x00000001; /* VALID. */ + while (ptes--) { + VMM_WO032(pt, vmm, ptei++ * 4, data); + data += 0x00000020; + } +} + +static void +nv41_vmm_pgt_sgl(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + VMM_MAP_ITER_SGL(vmm, pt, ptei, ptes, map, nv41_vmm_pgt_pte); +} + +static void +nv41_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ +#if PAGE_SHIFT == 12 + nvkm_kmap(pt->memory); + while (ptes--) { + const u32 data = (*map->dma++ >> 7) | 0x00000001; + VMM_WO032(pt, vmm, ptei++ * 4, data); + } + nvkm_done(pt->memory); +#else + VMM_MAP_ITER_DMA(vmm, pt, ptei, ptes, map, nv41_vmm_pgt_pte); +#endif +} + +static void +nv41_vmm_pgt_unmap(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes) +{ + VMM_FO032(pt, vmm, ptei * 4, 0, ptes); +} + +static const struct nvkm_vmm_desc_func +nv41_vmm_desc_pgt = { + .unmap = nv41_vmm_pgt_unmap, + .dma = nv41_vmm_pgt_dma, + .sgl = nv41_vmm_pgt_sgl, +}; + +static const struct nvkm_vmm_desc +nv41_vmm_desc_12[] = { + { PGT, 17, 4, 0x1000, &nv41_vmm_desc_pgt }, + {} +}; + +static void +nv41_vmm_flush(struct nvkm_vmm *vmm, int level) +{ + struct nvkm_subdev *subdev = &vmm->mmu->subdev; + struct nvkm_device *device = subdev->device; + + mutex_lock(&subdev->mutex); + nvkm_wr32(device, 0x100810, 0x00000022); + nvkm_msec(device, 2000, + if (nvkm_rd32(device, 0x100810) & 0x00000020) + break; + ); + nvkm_wr32(device, 0x100810, 0x00000000); + mutex_unlock(&subdev->mutex); +} + +static const struct nvkm_vmm_func +nv41_vmm = { + .valid = nv04_vmm_valid, + .flush = nv41_vmm_flush, + .page = { + { 12, &nv41_vmm_desc_12[0], NVKM_VMM_PAGE_HOST }, + {} + } +}; + +int +nv41_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return nv04_vmm_new_(&nv41_vmm, mmu, 0, addr, size, + argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv44.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv44.c new file mode 100644 index 000000000000..b834e4352334 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv44.c @@ -0,0 +1,230 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +#include <subdev/timer.h> + +static void +nv44_vmm_pgt_fill(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + dma_addr_t *list, u32 ptei, u32 ptes) +{ + u32 pteo = (ptei << 2) & ~0x0000000f; + u32 tmp[4]; + + tmp[0] = nvkm_ro32(pt->memory, pteo + 0x0); + tmp[1] = nvkm_ro32(pt->memory, pteo + 0x4); + tmp[2] = nvkm_ro32(pt->memory, pteo + 0x8); + tmp[3] = nvkm_ro32(pt->memory, pteo + 0xc); + + while (ptes--) { + u32 addr = (list ? *list++ : vmm->null) >> 12; + switch (ptei++ & 0x3) { + case 0: + tmp[0] &= ~0x07ffffff; + tmp[0] |= addr; + break; + case 1: + tmp[0] &= ~0xf8000000; + tmp[0] |= addr << 27; + tmp[1] &= ~0x003fffff; + tmp[1] |= addr >> 5; + break; + case 2: + tmp[1] &= ~0xffc00000; + tmp[1] |= addr << 22; + tmp[2] &= ~0x0001ffff; + tmp[2] |= addr >> 10; + break; + case 3: + tmp[2] &= ~0xfffe0000; + tmp[2] |= addr << 17; + tmp[3] &= ~0x00000fff; + tmp[3] |= addr >> 15; + break; + } + } + + VMM_WO032(pt, vmm, pteo + 0x0, tmp[0]); + VMM_WO032(pt, vmm, pteo + 0x4, tmp[1]); + VMM_WO032(pt, vmm, pteo + 0x8, tmp[2]); + VMM_WO032(pt, vmm, pteo + 0xc, tmp[3] | 0x40000000); +} + +static void +nv44_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) +{ + dma_addr_t tmp[4], i; + + if (ptei & 3) { + const u32 pten = min(ptes, 4 - (ptei & 3)); + for (i = 0; i < pten; i++, addr += 0x1000) + tmp[i] = addr; + nv44_vmm_pgt_fill(vmm, pt, tmp, ptei, pten); + ptei += pten; + ptes -= pten; + } + + while (ptes >= 4) { + for (i = 0; i < 4; i++, addr += 0x1000) + tmp[i] = addr >> 12; + VMM_WO032(pt, vmm, ptei++ * 4, tmp[0] >> 0 | tmp[1] << 27); + VMM_WO032(pt, vmm, ptei++ * 4, tmp[1] >> 5 | tmp[2] << 22); + VMM_WO032(pt, vmm, ptei++ * 4, tmp[2] >> 10 | tmp[3] << 17); + VMM_WO032(pt, vmm, ptei++ * 4, tmp[3] >> 15 | 0x40000000); + ptes -= 4; + } + + if (ptes) { + for (i = 0; i < ptes; i++, addr += 0x1000) + tmp[i] = addr; + nv44_vmm_pgt_fill(vmm, pt, tmp, ptei, ptes); + } +} + +static void +nv44_vmm_pgt_sgl(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + VMM_MAP_ITER_SGL(vmm, pt, ptei, ptes, map, nv44_vmm_pgt_pte); +} + +static void +nv44_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ +#if PAGE_SHIFT == 12 + nvkm_kmap(pt->memory); + if (ptei & 3) { + const u32 pten = min(ptes, 4 - (ptei & 3)); + nv44_vmm_pgt_fill(vmm, pt, map->dma, ptei, pten); + ptei += pten; + ptes -= pten; + map->dma += pten; + } + + while (ptes >= 4) { + u32 tmp[4], i; + for (i = 0; i < 4; i++) + tmp[i] = *map->dma++ >> 12; + VMM_WO032(pt, vmm, ptei++ * 4, tmp[0] >> 0 | tmp[1] << 27); + VMM_WO032(pt, vmm, ptei++ * 4, tmp[1] >> 5 | tmp[2] << 22); + VMM_WO032(pt, vmm, ptei++ * 4, tmp[2] >> 10 | tmp[3] << 17); + VMM_WO032(pt, vmm, ptei++ * 4, tmp[3] >> 15 | 0x40000000); + ptes -= 4; + } + + if (ptes) { + nv44_vmm_pgt_fill(vmm, pt, map->dma, ptei, ptes); + map->dma += ptes; + } + nvkm_done(pt->memory); +#else + VMM_MAP_ITER_DMA(vmm, pt, ptei, ptes, map, nv44_vmm_pgt_pte); +#endif +} + +static void +nv44_vmm_pgt_unmap(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes) +{ + nvkm_kmap(pt->memory); + if (ptei & 3) { + const u32 pten = min(ptes, 4 - (ptei & 3)); + nv44_vmm_pgt_fill(vmm, pt, NULL, ptei, pten); + ptei += pten; + ptes -= pten; + } + + while (ptes > 4) { + VMM_WO032(pt, vmm, ptei++ * 4, 0x00000000); + VMM_WO032(pt, vmm, ptei++ * 4, 0x00000000); + VMM_WO032(pt, vmm, ptei++ * 4, 0x00000000); + VMM_WO032(pt, vmm, ptei++ * 4, 0x00000000); + ptes -= 4; + } + + if (ptes) + nv44_vmm_pgt_fill(vmm, pt, NULL, ptei, ptes); + nvkm_done(pt->memory); +} + +static const struct nvkm_vmm_desc_func +nv44_vmm_desc_pgt = { + .unmap = nv44_vmm_pgt_unmap, + .dma = nv44_vmm_pgt_dma, + .sgl = nv44_vmm_pgt_sgl, +}; + +static const struct nvkm_vmm_desc +nv44_vmm_desc_12[] = { + { PGT, 17, 4, 0x80000, &nv44_vmm_desc_pgt }, + {} +}; + +static void +nv44_vmm_flush(struct nvkm_vmm *vmm, int level) +{ + struct nvkm_device *device = vmm->mmu->subdev.device; + nvkm_wr32(device, 0x100814, vmm->limit - 4096); + nvkm_wr32(device, 0x100808, 0x000000020); + nvkm_msec(device, 2000, + if (nvkm_rd32(device, 0x100808) & 0x00000001) + break; + ); + nvkm_wr32(device, 0x100808, 0x00000000); +} + +static const struct nvkm_vmm_func +nv44_vmm = { + .valid = nv04_vmm_valid, + .flush = nv44_vmm_flush, + .page = { + { 12, &nv44_vmm_desc_12[0], NVKM_VMM_PAGE_HOST }, + {} + } +}; + +int +nv44_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + struct nvkm_subdev *subdev = &mmu->subdev; + struct nvkm_vmm *vmm; + int ret; + + ret = nv04_vmm_new_(&nv44_vmm, mmu, 0, addr, size, + argv, argc, key, name, &vmm); + *pvmm = vmm; + if (ret) + return ret; + + vmm->nullp = dma_alloc_coherent(subdev->device->dev, 16 * 1024, + &vmm->null, GFP_KERNEL); + if (!vmm->nullp) { + nvkm_warn(subdev, "unable to allocate dummy pages\n"); + vmm->null = 0; + } + + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c new file mode 100644 index 000000000000..863a2edd9861 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c @@ -0,0 +1,385 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +#include <subdev/fb.h> +#include <subdev/timer.h> +#include <engine/gr.h> + +#include <nvif/if500d.h> +#include <nvif/unpack.h> + +static inline void +nv50_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) +{ + u64 next = addr | map->type, data; + u32 pten; + int log2blk; + + map->type += ptes * map->ctag; + + while (ptes) { + for (log2blk = 7; log2blk >= 0; log2blk--) { + pten = 1 << log2blk; + if (ptes >= pten && IS_ALIGNED(ptei, pten)) + break; + } + + data = next | (log2blk << 7); + next += pten * map->next; + ptes -= pten; + + while (pten--) + VMM_WO064(pt, vmm, ptei++ * 8, data); + } +} + +static void +nv50_vmm_pgt_sgl(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + VMM_MAP_ITER_SGL(vmm, pt, ptei, ptes, map, nv50_vmm_pgt_pte); +} + +static void +nv50_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + if (map->page->shift == PAGE_SHIFT) { + VMM_SPAM(vmm, "DMAA %08x %08x PTE(s)", ptei, ptes); + nvkm_kmap(pt->memory); + while (ptes--) { + const u64 data = *map->dma++ | map->type; + VMM_WO064(pt, vmm, ptei++ * 8, data); + map->type += map->ctag; + } + nvkm_done(pt->memory); + return; + } + + VMM_MAP_ITER_DMA(vmm, pt, ptei, ptes, map, nv50_vmm_pgt_pte); +} + +static void +nv50_vmm_pgt_mem(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + VMM_MAP_ITER_MEM(vmm, pt, ptei, ptes, map, nv50_vmm_pgt_pte); +} + +static void +nv50_vmm_pgt_unmap(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes) +{ + VMM_FO064(pt, vmm, ptei * 8, 0ULL, ptes); +} + +static const struct nvkm_vmm_desc_func +nv50_vmm_pgt = { + .unmap = nv50_vmm_pgt_unmap, + .mem = nv50_vmm_pgt_mem, + .dma = nv50_vmm_pgt_dma, + .sgl = nv50_vmm_pgt_sgl, +}; + +static bool +nv50_vmm_pde(struct nvkm_vmm *vmm, struct nvkm_vmm_pt *pgt, u64 *pdata) +{ + struct nvkm_mmu_pt *pt; + u64 data = 0xdeadcafe00000000ULL; + if (pgt && (pt = pgt->pt[0])) { + switch (pgt->page) { + case 16: data = 0x00000001; break; + case 12: data = 0x00000003; + switch (nvkm_memory_size(pt->memory)) { + case 0x100000: data |= 0x00000000; break; + case 0x040000: data |= 0x00000020; break; + case 0x020000: data |= 0x00000040; break; + case 0x010000: data |= 0x00000060; break; + default: + WARN_ON(1); + return false; + } + break; + default: + WARN_ON(1); + return false; + } + + switch (nvkm_memory_target(pt->memory)) { + case NVKM_MEM_TARGET_VRAM: data |= 0x00000000; break; + case NVKM_MEM_TARGET_HOST: data |= 0x00000008; break; + case NVKM_MEM_TARGET_NCOH: data |= 0x0000000c; break; + default: + WARN_ON(1); + return false; + } + + data |= pt->addr; + } + *pdata = data; + return true; +} + +static void +nv50_vmm_pgd_pde(struct nvkm_vmm *vmm, struct nvkm_vmm_pt *pgd, u32 pdei) +{ + struct nvkm_vmm_join *join; + u32 pdeo = vmm->mmu->func->vmm.pd_offset + (pdei * 8); + u64 data; + + if (!nv50_vmm_pde(vmm, pgd->pde[pdei], &data)) + return; + + list_for_each_entry(join, &vmm->join, head) { + nvkm_kmap(join->inst); + nvkm_wo64(join->inst, pdeo, data); + nvkm_done(join->inst); + } +} + +static const struct nvkm_vmm_desc_func +nv50_vmm_pgd = { + .pde = nv50_vmm_pgd_pde, +}; + +static const struct nvkm_vmm_desc +nv50_vmm_desc_12[] = { + { PGT, 17, 8, 0x1000, &nv50_vmm_pgt }, + { PGD, 11, 0, 0x0000, &nv50_vmm_pgd }, + {} +}; + +static const struct nvkm_vmm_desc +nv50_vmm_desc_16[] = { + { PGT, 13, 8, 0x1000, &nv50_vmm_pgt }, + { PGD, 11, 0, 0x0000, &nv50_vmm_pgd }, + {} +}; + +static void +nv50_vmm_flush(struct nvkm_vmm *vmm, int level) +{ + struct nvkm_subdev *subdev = &vmm->mmu->subdev; + struct nvkm_device *device = subdev->device; + int i, id; + + mutex_lock(&subdev->mutex); + for (i = 0; i < NVKM_SUBDEV_NR; i++) { + if (!atomic_read(&vmm->engref[i])) + continue; + + /* unfortunate hw bug workaround... */ + if (i == NVKM_ENGINE_GR && device->gr) { + int ret = nvkm_gr_tlb_flush(device->gr); + if (ret != -ENODEV) + continue; + } + + switch (i) { + case NVKM_ENGINE_GR : id = 0x00; break; + case NVKM_ENGINE_VP : + case NVKM_ENGINE_MSPDEC: id = 0x01; break; + case NVKM_SUBDEV_BAR : id = 0x06; break; + case NVKM_ENGINE_MSPPP : + case NVKM_ENGINE_MPEG : id = 0x08; break; + case NVKM_ENGINE_BSP : + case NVKM_ENGINE_MSVLD : id = 0x09; break; + case NVKM_ENGINE_CIPHER: + case NVKM_ENGINE_SEC : id = 0x0a; break; + case NVKM_ENGINE_CE0 : id = 0x0d; break; + default: + continue; + } + + nvkm_wr32(device, 0x100c80, (id << 16) | 1); + if (nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x100c80) & 0x00000001)) + break; + ) < 0) + nvkm_error(subdev, "%s mmu invalidate timeout\n", + nvkm_subdev_name[i]); + } + mutex_unlock(&subdev->mutex); +} + +static int +nv50_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, + struct nvkm_vmm_map *map) +{ + const struct nvkm_vmm_page *page = map->page; + union { + struct nv50_vmm_map_vn vn; + struct nv50_vmm_map_v0 v0; + } *args = argv; + struct nvkm_device *device = vmm->mmu->subdev.device; + struct nvkm_ram *ram = device->fb->ram; + struct nvkm_memory *memory = map->memory; + u8 aper, kind, comp, priv, ro; + int kindn, ret = -ENOSYS; + const u8 *kindm; + + map->type = map->ctag = 0; + map->next = 1 << page->shift; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + ro = !!args->v0.ro; + priv = !!args->v0.priv; + kind = args->v0.kind & 0x7f; + comp = args->v0.comp & 0x03; + } else + if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) { + ro = 0; + priv = 0; + kind = 0x00; + comp = 0; + } else { + VMM_DEBUG(vmm, "args"); + return ret; + } + + switch (nvkm_memory_target(memory)) { + case NVKM_MEM_TARGET_VRAM: + if (ram->stolen) { + map->type |= ram->stolen; + aper = 3; + } else { + aper = 0; + } + break; + case NVKM_MEM_TARGET_HOST: + aper = 2; + break; + case NVKM_MEM_TARGET_NCOH: + aper = 3; + break; + default: + WARN_ON(1); + return -EINVAL; + } + + kindm = vmm->mmu->func->kind(vmm->mmu, &kindn); + if (kind >= kindn || kindm[kind] == 0x7f) { + VMM_DEBUG(vmm, "kind %02x", kind); + return -EINVAL; + } + + if (map->mem && map->mem->type != kindm[kind]) { + VMM_DEBUG(vmm, "kind %02x bankswz: %d %d", kind, + kindm[kind], map->mem->type); + return -EINVAL; + } + + if (comp) { + u32 tags = (nvkm_memory_size(memory) >> 16) * comp; + if (aper != 0 || !(page->type & NVKM_VMM_PAGE_COMP)) { + VMM_DEBUG(vmm, "comp %d %02x", aper, page->type); + return -EINVAL; + } + + ret = nvkm_memory_tags_get(memory, device, tags, NULL, + &map->tags); + if (ret) { + VMM_DEBUG(vmm, "comp %d", ret); + return ret; + } + + if (map->tags->mn) { + u32 tags = map->tags->mn->offset + (map->offset >> 16); + map->ctag |= (u64)comp << 49; + map->type |= (u64)comp << 47; + map->type |= (u64)tags << 49; + map->next |= map->ctag; + } + } + + map->type |= BIT(0); /* Valid. */ + map->type |= (u64)ro << 3; + map->type |= (u64)aper << 4; + map->type |= (u64)priv << 6; + map->type |= (u64)kind << 40; + return 0; +} + +static void +nv50_vmm_part(struct nvkm_vmm *vmm, struct nvkm_memory *inst) +{ + struct nvkm_vmm_join *join; + + list_for_each_entry(join, &vmm->join, head) { + if (join->inst == inst) { + list_del(&join->head); + kfree(join); + break; + } + } +} + +static int +nv50_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst) +{ + const u32 pd_offset = vmm->mmu->func->vmm.pd_offset; + struct nvkm_vmm_join *join; + int ret = 0; + u64 data; + u32 pdei; + + if (!(join = kmalloc(sizeof(*join), GFP_KERNEL))) + return -ENOMEM; + join->inst = inst; + list_add_tail(&join->head, &vmm->join); + + nvkm_kmap(join->inst); + for (pdei = vmm->start >> 29; pdei <= (vmm->limit - 1) >> 29; pdei++) { + if (!nv50_vmm_pde(vmm, vmm->pd->pde[pdei], &data)) { + ret = -EINVAL; + break; + } + nvkm_wo64(join->inst, pd_offset + (pdei * 8), data); + } + nvkm_done(join->inst); + return ret; +} + +static const struct nvkm_vmm_func +nv50_vmm = { + .join = nv50_vmm_join, + .part = nv50_vmm_part, + .valid = nv50_vmm_valid, + .flush = nv50_vmm_flush, + .page_block = 1 << 29, + .page = { + { 16, &nv50_vmm_desc_16[0], NVKM_VMM_PAGE_xVxC }, + { 12, &nv50_vmm_desc_12[0], NVKM_VMM_PAGE_xVHx }, + {} + } +}; + +int +nv50_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return nv04_vmm_new_(&nv50_vmm, mmu, 0, addr, size, + argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/mxms.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/mxms.h index 333e0c01545a..011a67fe4a8b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/mxms.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/mxms.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVMXM_MXMS_H__ #define __NVMXM_MXMS_H__ #include "priv.h" diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/priv.h index 7d970157aed1..6767c2279e7c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_MXM_PRIV_H__ #define __NVKM_MXM_PRIV_H__ #define nvkm_mxm(p) container_of((p), struct nvkm_mxm, subdev) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/agp.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/agp.h index df2dd08363ad..edb7f00f0de5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/agp.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/agp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #include "priv.h" #if defined(CONFIG_AGP) || (defined(CONFIG_AGP_MODULE) && defined(MODULE)) #ifndef __NVKM_PCI_AGP_H__ diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c index a4cb82495cee..b1b1f3626b96 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c @@ -87,7 +87,7 @@ nvkm_pci_fini(struct nvkm_subdev *subdev, bool suspend) if (pci->irq >= 0) { free_irq(pci->irq, pci); pci->irq = -1; - }; + } if (pci->agp.bridge) nvkm_agp_fini(pci); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h index 86921ec962d6..c17f6063c9ea 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_PCI_PRIV_H__ #define __NVKM_PCI_PRIV_H__ #define nvkm_pci(p) container_of((p), struct nvkm_pci, subdev) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h index 0bcf0b307a61..53d01fb00a8b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ static uint32_t gf100_pmu_data[] = { /* 0x0000: proc_kern */ 0x52544e49, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf119.fuc4.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf119.fuc4.h index fe8905666c67..e1e981966c2d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf119.fuc4.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf119.fuc4.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ static uint32_t gf119_pmu_data[] = { /* 0x0000: proc_kern */ 0x52544e49, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h index 9cf4e6fc724e..c4edbc79e41a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ static uint32_t gk208_pmu_data[] = { /* 0x0000: proc_kern */ 0x52544e49, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h index 5d692425b190..6a2572e8945a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ static uint32_t gt215_pmu_data[] = { /* 0x0000: proc_kern */ 0x52544e49, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/os.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/os.h index c8b06cb77e72..30d9480b9be5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/os.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/os.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_PWR_OS_H__ #define __NVKM_PWR_OS_H__ diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c index e6f74168238c..11b28b086a06 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #ifndef __NVKM_PMU_MEMX_H__ #define __NVKM_PMU_MEMX_H__ #include "priv.h" diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h index a4c48a10cd47..e9c6f9725afe 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_PMU_PRIV_H__ #define __NVKM_PMU_PRIV_H__ #define nvkm_pmu(p) container_of((p), struct nvkm_pmu, subdev) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c index 73ca1203281d..5e91b3f90065 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c @@ -39,7 +39,7 @@ gm200_secboot_run_blob(struct nvkm_secboot *sb, struct nvkm_gpuobj *blob, { struct gm200_secboot *gsb = gm200_secboot(sb); struct nvkm_subdev *subdev = &gsb->base.subdev; - struct nvkm_vma vma; + struct nvkm_vma *vma = NULL; u32 start_address; int ret; @@ -48,12 +48,16 @@ gm200_secboot_run_blob(struct nvkm_secboot *sb, struct nvkm_gpuobj *blob, return ret; /* Map the HS firmware so the HS bootloader can see it */ - ret = nvkm_gpuobj_map(blob, gsb->vm, NV_MEM_ACCESS_RW, &vma); + ret = nvkm_vmm_get(gsb->vmm, 12, blob->size, &vma); if (ret) { nvkm_falcon_put(falcon, subdev); return ret; } + ret = nvkm_memory_map(blob, 0, gsb->vmm, vma, NULL, 0); + if (ret) + goto end; + /* Reset and set the falcon up */ ret = nvkm_falcon_reset(falcon); if (ret) @@ -61,7 +65,7 @@ gm200_secboot_run_blob(struct nvkm_secboot *sb, struct nvkm_gpuobj *blob, nvkm_falcon_bind_context(falcon, gsb->inst); /* Load the HS bootloader into the falcon's IMEM/DMEM */ - ret = sb->acr->func->load(sb->acr, falcon, blob, vma.offset); + ret = sb->acr->func->load(sb->acr, falcon, blob, vma->addr); if (ret < 0) goto end; @@ -91,7 +95,7 @@ end: nvkm_mc_intr_mask(sb->subdev.device, falcon->owner->index, true); /* We don't need the ACR firmware anymore */ - nvkm_gpuobj_unmap(&vma); + nvkm_vmm_put(gsb->vmm, &vma); nvkm_falcon_put(falcon, subdev); return ret; @@ -102,37 +106,26 @@ gm200_secboot_oneinit(struct nvkm_secboot *sb) { struct gm200_secboot *gsb = gm200_secboot(sb); struct nvkm_device *device = sb->subdev.device; - struct nvkm_vm *vm; - const u64 vm_area_len = 600 * 1024; int ret; /* Allocate instance block and VM */ - ret = nvkm_gpuobj_new(device, 0x1000, 0, true, NULL, &gsb->inst); + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0, true, + &gsb->inst); if (ret) return ret; - ret = nvkm_gpuobj_new(device, 0x8000, 0, true, NULL, &gsb->pgd); + ret = nvkm_vmm_new(device, 0, 600 * 1024, NULL, 0, NULL, "acr", + &gsb->vmm); if (ret) return ret; - ret = nvkm_vm_new(device, 0, vm_area_len, 0, NULL, &vm); - if (ret) - return ret; - - atomic_inc(&vm->engref[NVKM_SUBDEV_PMU]); + atomic_inc(&gsb->vmm->engref[NVKM_SUBDEV_PMU]); + gsb->vmm->debug = gsb->base.subdev.debug; - ret = nvkm_vm_ref(vm, &gsb->vm, gsb->pgd); - nvkm_vm_ref(NULL, &vm, NULL); + ret = nvkm_vmm_join(gsb->vmm, gsb->inst); if (ret) return ret; - nvkm_kmap(gsb->inst); - nvkm_wo32(gsb->inst, 0x200, lower_32_bits(gsb->pgd->addr)); - nvkm_wo32(gsb->inst, 0x204, upper_32_bits(gsb->pgd->addr)); - nvkm_wo32(gsb->inst, 0x208, lower_32_bits(vm_area_len - 1)); - nvkm_wo32(gsb->inst, 0x20c, upper_32_bits(vm_area_len - 1)); - nvkm_done(gsb->inst); - if (sb->acr->func->oneinit) { ret = sb->acr->func->oneinit(sb->acr, sb); if (ret) @@ -160,9 +153,9 @@ gm200_secboot_dtor(struct nvkm_secboot *sb) sb->acr->func->dtor(sb->acr); - nvkm_vm_ref(NULL, &gsb->vm, gsb->pgd); - nvkm_gpuobj_del(&gsb->pgd); - nvkm_gpuobj_del(&gsb->inst); + nvkm_vmm_part(gsb->vmm, gsb->inst); + nvkm_vmm_unref(&gsb->vmm); + nvkm_memory_unref(&gsb->inst); return gsb; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h index c8ab3d76bdef..62c5e162099a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h @@ -29,9 +29,8 @@ struct gm200_secboot { struct nvkm_secboot base; /* Instance block & address space used for HS FW execution */ - struct nvkm_gpuobj *inst; - struct nvkm_gpuobj *pgd; - struct nvkm_vm *vm; + struct nvkm_memory *inst; + struct nvkm_vmm *vmm; }; #define gm200_secboot(sb) container_of(sb, struct gm200_secboot, base) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c index ee989210725e..6f10b098676c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c @@ -183,7 +183,7 @@ acr_ls_sec2_post_run(const struct nvkm_acr *acr, const struct nvkm_secboot *sb) break; ); if (reg & BIT(4)) { - nvkm_debug(subdev, "applying workaround for start bug..."); + nvkm_debug(subdev, "applying workaround for start bug...\n"); nvkm_falcon_start(sb->boot_falcon); nvkm_msec(subdev->device, 1, if ((reg = nvkm_rd32(subdev->device, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h index 885e919a8720..d9091f029506 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h @@ -25,6 +25,7 @@ #include <subdev/secboot.h> #include <subdev/mmu.h> +struct nvkm_gpuobj; struct nvkm_secboot_func { int (*oneinit)(struct nvkm_secboot *); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild index 2bafcc1d1818..7ba56b12badd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild @@ -12,3 +12,4 @@ nvkm-y += nvkm/subdev/therm/gt215.o nvkm-y += nvkm/subdev/therm/gf119.o nvkm-y += nvkm/subdev/therm/gm107.o nvkm-y += nvkm/subdev/therm/gm200.o +nvkm-y += nvkm/subdev/therm/gp100.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c index 952a7cb0a59a..f27fc6d0d4c6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c @@ -341,7 +341,8 @@ nvkm_therm_init(struct nvkm_subdev *subdev) { struct nvkm_therm *therm = nvkm_therm(subdev); - therm->func->init(therm); + if (therm->func->init) + therm->func->init(therm); if (therm->suspend >= 0) { /* restore the pwm value only when on manual or auto mode */ diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.h b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gp100.c index 1df8154d0626..9f0dea3f61dc 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gp100.c @@ -1,5 +1,5 @@ /* - * Copyright 2015 Advanced Micro Devices, Inc. + * Copyright 2017 Rhys Kidd * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -19,26 +19,38 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * + * Authors: Rhys Kidd */ -#ifndef POLARIS10_SMC_H -#define POLARIS10_SMC_H +#include "priv.h" -#include "smumgr.h" +static int +gp100_temp_get(struct nvkm_therm *therm) +{ + struct nvkm_device *device = therm->subdev.device; + struct nvkm_subdev *subdev = &therm->subdev; + u32 tsensor = nvkm_rd32(device, 0x020460); + u32 inttemp = (tsensor & 0x0001fff8); + /* device SHADOWed */ + if (tsensor & 0x40000000) + nvkm_trace(subdev, "reading temperature from SHADOWed sensor\n"); -int polaris10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr); -int polaris10_populate_all_memory_levels(struct pp_hwmgr *hwmgr); -int polaris10_init_smc_table(struct pp_hwmgr *hwmgr); -int polaris10_thermal_setup_fan_table(struct pp_hwmgr *hwmgr); -int polaris10_thermal_avfs_enable(struct pp_hwmgr *hwmgr); -int polaris10_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type); -int polaris10_update_sclk_threshold(struct pp_hwmgr *hwmgr); -uint32_t polaris10_get_offsetof(uint32_t type, uint32_t member); -uint32_t polaris10_get_mac_definition(uint32_t value); -int polaris10_process_firmware_header(struct pp_hwmgr *hwmgr); -bool polaris10_is_dpm_running(struct pp_hwmgr *hwmgr); -int polaris10_populate_requested_graphic_levels(struct pp_hwmgr *hwmgr, - struct amd_pp_profile *request); + /* device valid */ + if (tsensor & 0x20000000) + return (inttemp >> 8); + else + return -ENODEV; +} -#endif +static const struct nvkm_therm_func +gp100_therm = { + .temp_get = gp100_temp_get, + .program_alarms = nvkm_therm_program_alarms_polling, +}; +int +gp100_therm_new(struct nvkm_device *device, int index, + struct nvkm_therm **ptherm) +{ + return nvkm_therm_new_(&gp100_therm, device, index, ptherm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/priv.h index f820ca2aeda4..3b8878486faa 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_TIMER_PRIV_H__ #define __NVKM_TIMER_PRIV_H__ #define nvkm_timer(p) container_of((p), struct nvkm_timer, subdev) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/regsnv04.h b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/regsnv04.h index 10bef85b485e..23d07f5f44d9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/regsnv04.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/regsnv04.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #define NV04_PTIMER_INTR_0 0x009100 #define NV04_PTIMER_INTR_EN_0 0x009140 #define NV04_PTIMER_NUMERATOR 0x009200 diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/top/priv.h index adb3ed03d937..4f49b0acaa0e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_TOP_PRIV_H__ #define __NVKM_TOP_PRIV_H__ #define nvkm_top(p) container_of((p), struct nvkm_top, subdev) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/priv.h index 354bafe4b4e2..1a8ad560321b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/priv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVKM_VOLT_PRIV_H__ #define __NVKM_VOLT_PRIV_H__ #define nvkm_volt(p) container_of((p), struct nvkm_volt, subdev) diff --git a/drivers/gpu/drm/omapdrm/Makefile b/drivers/gpu/drm/omapdrm/Makefile index b391be7ecb6c..f115253115c5 100644 --- a/drivers/gpu/drm/omapdrm/Makefile +++ b/drivers/gpu/drm/omapdrm/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) diff --git a/drivers/gpu/drm/omapdrm/displays/Makefile b/drivers/gpu/drm/omapdrm/displays/Makefile index 46baafb1a83e..d99659e1381b 100644 --- a/drivers/gpu/drm/omapdrm/displays/Makefile +++ b/drivers/gpu/drm/omapdrm/displays/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DRM_OMAP_ENCODER_OPA362) += encoder-opa362.o obj-$(CONFIG_DRM_OMAP_ENCODER_TFP410) += encoder-tfp410.o obj-$(CONFIG_DRM_OMAP_ENCODER_TPD12S015) += encoder-tpd12s015.o diff --git a/drivers/gpu/drm/omapdrm/dss/Makefile b/drivers/gpu/drm/omapdrm/dss/Makefile index 3c5644c3fc38..904101c5e79d 100644 --- a/drivers/gpu/drm/omapdrm/dss/Makefile +++ b/drivers/gpu/drm/omapdrm/dss/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_OMAP2_DSS_INIT) += omapdss-boot-init.o obj-$(CONFIG_OMAP_DSS_BASE) += omapdss-base.o diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi_common.c b/drivers/gpu/drm/omapdrm/dss/hdmi_common.c index 4dfb67fe5f6d..3ecde23ac604 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi_common.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi_common.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #define DSS_SUBSYS_NAME "HDMI" diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile index 77ede3467324..2c4e1a93e05f 100644 --- a/drivers/gpu/drm/panel/Makefile +++ b/drivers/gpu/drm/panel/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DRM_PANEL_LVDS) += panel-lvds.o obj-$(CONFIG_DRM_PANEL_SIMPLE) += panel-simple.o obj-$(CONFIG_DRM_PANEL_INNOLUX_P079ZCA) += panel-innolux-p079zca.o diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index a3c96d2ea41c..b7c4709f7b34 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -1008,6 +1008,10 @@ static const struct panel_desc hitachi_tx23d38vm0caa = { .width = 195, .height = 117, }, + .delay = { + .enable = 160, + .disable = 160, + }, }; static const struct drm_display_mode innolux_at043tn24_mode = { @@ -1018,8 +1022,8 @@ static const struct drm_display_mode innolux_at043tn24_mode = { .htotal = 480 + 2 + 41 + 2, .vdisplay = 272, .vsync_start = 272 + 2, - .vsync_end = 272 + 2 + 11, - .vtotal = 272 + 2 + 11 + 2, + .vsync_end = 272 + 2 + 10, + .vtotal = 272 + 2 + 10 + 2, .vrefresh = 60, .flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC, }; @@ -1033,6 +1037,7 @@ static const struct panel_desc innolux_at043tn24 = { .height = 54, }, .bus_format = MEDIA_BUS_FMT_RGB888_1X24, + .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_POSEDGE, }; static const struct drm_display_mode innolux_at070tn92_mode = { @@ -1832,6 +1837,30 @@ static const struct panel_desc tianma_tm070jdhg30 = { .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, }; +static const struct drm_display_mode toshiba_lt089ac29000_mode = { + .clock = 79500, + .hdisplay = 1280, + .hsync_start = 1280 + 192, + .hsync_end = 1280 + 192 + 128, + .htotal = 1280 + 192 + 128 + 64, + .vdisplay = 768, + .vsync_start = 768 + 20, + .vsync_end = 768 + 20 + 7, + .vtotal = 768 + 20 + 7 + 3, + .vrefresh = 60, +}; + +static const struct panel_desc toshiba_lt089ac29000 = { + .modes = &toshiba_lt089ac29000_mode, + .num_modes = 1, + .size = { + .width = 194, + .height = 116, + }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X24, + .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_POSEDGE, +}; + static const struct drm_display_mode tpk_f07a_0102_mode = { .clock = 33260, .hdisplay = 800, @@ -2114,6 +2143,9 @@ static const struct of_device_id platform_of_match[] = { .compatible = "tianma,tm070jdhg30", .data = &tianma_tm070jdhg30, }, { + .compatible = "toshiba,lt089ac29000", + .data = &toshiba_lt089ac29000, + }, { .compatible = "tpk,f07a-0102", .data = &tpk_f07a_0102, }, { diff --git a/drivers/gpu/drm/pl111/Makefile b/drivers/gpu/drm/pl111/Makefile index fce1453a93e1..9c5e8dba8ac6 100644 --- a/drivers/gpu/drm/pl111/Makefile +++ b/drivers/gpu/drm/pl111/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 pl111_drm-y += pl111_display.o \ pl111_versatile.o \ pl111_drv.o diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c index 74fc9362ecf9..c0fb52c6d4ca 100644 --- a/drivers/gpu/drm/qxl/qxl_cmd.c +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -219,7 +219,7 @@ int qxl_garbage_collect(struct qxl_device *qdev) union qxl_release_info *info; while (qxl_ring_pop(qdev->release_ring, &id)) { - QXL_INFO(qdev, "popped %lld\n", id); + DRM_DEBUG_DRIVER("popped %lld\n", id); while (id) { release = qxl_release_from_id_locked(qdev, id); if (release == NULL) @@ -229,8 +229,8 @@ int qxl_garbage_collect(struct qxl_device *qdev) next_id = info->next; qxl_release_unmap(qdev, release, info); - QXL_INFO(qdev, "popped %lld, next %lld\n", id, - next_id); + DRM_DEBUG_DRIVER("popped %lld, next %lld\n", id, + next_id); switch (release->type) { case QXL_RELEASE_DRAWABLE: @@ -248,7 +248,7 @@ int qxl_garbage_collect(struct qxl_device *qdev) } } - QXL_INFO(qdev, "%s: %d\n", __func__, i); + DRM_DEBUG_DRIVER("%d\n", i); return i; } @@ -381,17 +381,19 @@ void qxl_io_create_primary(struct qxl_device *qdev, { struct qxl_surface_create *create; - QXL_INFO(qdev, "%s: qdev %p, ram_header %p\n", __func__, qdev, - qdev->ram_header); + DRM_DEBUG_DRIVER("qdev %p, ram_header %p\n", qdev, qdev->ram_header); create = &qdev->ram_header->create_surface; create->format = bo->surf.format; create->width = bo->surf.width; create->height = bo->surf.height; create->stride = bo->surf.stride; - create->mem = qxl_bo_physical_address(qdev, bo, offset); + if (bo->shadow) { + create->mem = qxl_bo_physical_address(qdev, bo->shadow, offset); + } else { + create->mem = qxl_bo_physical_address(qdev, bo, offset); + } - QXL_INFO(qdev, "%s: mem = %llx, from %p\n", __func__, create->mem, - bo->kptr); + DRM_DEBUG_DRIVER("mem = %llx, from %p\n", create->mem, bo->kptr); create->flags = QXL_SURF_FLAG_KEEP_DATA; create->type = QXL_SURF_TYPE_PRIMARY; @@ -401,7 +403,7 @@ void qxl_io_create_primary(struct qxl_device *qdev, void qxl_io_memslot_add(struct qxl_device *qdev, uint8_t id) { - QXL_INFO(qdev, "qxl_memslot_add %d\n", id); + DRM_DEBUG_DRIVER("qxl_memslot_add %d\n", id); wait_for_io_cmd(qdev, id, QXL_IO_MEMSLOT_ADD_ASYNC); } diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index afbf50d0c08f..4756b3c9bf2c 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -305,7 +305,9 @@ static const struct drm_crtc_funcs qxl_crtc_funcs = { void qxl_user_framebuffer_destroy(struct drm_framebuffer *fb) { struct qxl_framebuffer *qxl_fb = to_qxl_framebuffer(fb); + struct qxl_bo *bo = gem_to_qxl_bo(qxl_fb->obj); + WARN_ON(bo->shadow); drm_gem_object_unreference_unlocked(qxl_fb->obj); drm_framebuffer_cleanup(fb); kfree(qxl_fb); @@ -508,6 +510,7 @@ static void qxl_primary_atomic_update(struct drm_plane *plane, .x2 = qfb->base.width, .y2 = qfb->base.height }; + bool same_shadow = false; if (old_state->fb) { qfb_old = to_qxl_framebuffer(old_state->fb); @@ -519,15 +522,23 @@ static void qxl_primary_atomic_update(struct drm_plane *plane, if (bo == bo_old) return; + if (bo_old && bo_old->shadow && bo->shadow && + bo_old->shadow == bo->shadow) { + same_shadow = true; + } + if (bo_old && bo_old->is_primary) { - qxl_io_destroy_primary(qdev); + if (!same_shadow) + qxl_io_destroy_primary(qdev); bo_old->is_primary = false; } if (!bo->is_primary) { - qxl_io_create_primary(qdev, 0, bo); + if (!same_shadow) + qxl_io_create_primary(qdev, 0, bo); bo->is_primary = true; } + qxl_draw_dirty_fb(qdev, qfb, bo, 0, 0, &norect, 1, 1); } @@ -679,8 +690,9 @@ static void qxl_cursor_atomic_disable(struct drm_plane *plane, static int qxl_plane_prepare_fb(struct drm_plane *plane, struct drm_plane_state *new_state) { + struct qxl_device *qdev = plane->dev->dev_private; struct drm_gem_object *obj; - struct qxl_bo *user_bo; + struct qxl_bo *user_bo, *old_bo = NULL; int ret; if (!new_state->fb) @@ -689,6 +701,32 @@ static int qxl_plane_prepare_fb(struct drm_plane *plane, obj = to_qxl_framebuffer(new_state->fb)->obj; user_bo = gem_to_qxl_bo(obj); + if (plane->type == DRM_PLANE_TYPE_PRIMARY && + user_bo->is_dumb && !user_bo->shadow) { + if (plane->state->fb) { + obj = to_qxl_framebuffer(plane->state->fb)->obj; + old_bo = gem_to_qxl_bo(obj); + } + if (old_bo && old_bo->shadow && + user_bo->gem_base.size == old_bo->gem_base.size && + plane->state->crtc == new_state->crtc && + plane->state->crtc_w == new_state->crtc_w && + plane->state->crtc_h == new_state->crtc_h && + plane->state->src_x == new_state->src_x && + plane->state->src_y == new_state->src_y && + plane->state->src_w == new_state->src_w && + plane->state->src_h == new_state->src_h && + plane->state->rotation == new_state->rotation && + plane->state->zpos == new_state->zpos) { + drm_gem_object_get(&old_bo->shadow->gem_base); + user_bo->shadow = old_bo->shadow; + } else { + qxl_bo_create(qdev, user_bo->gem_base.size, + true, true, QXL_GEM_DOMAIN_VRAM, NULL, + &user_bo->shadow); + } + } + ret = qxl_bo_pin(user_bo, QXL_GEM_DOMAIN_CPU, NULL); if (ret) return ret; @@ -713,6 +751,11 @@ static void qxl_plane_cleanup_fb(struct drm_plane *plane, obj = to_qxl_framebuffer(old_state->fb)->obj; user_bo = gem_to_qxl_bo(obj); qxl_bo_unpin(user_bo); + + if (user_bo->shadow && !user_bo->is_primary) { + drm_gem_object_put_unlocked(&user_bo->shadow->gem_base); + user_bo->shadow = NULL; + } } static const uint32_t qxl_cursor_plane_formats[] = { diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index 3397a1907336..08752c0ffb35 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -62,33 +62,9 @@ #define QXL_DEBUGFS_MAX_COMPONENTS 32 -extern int qxl_log_level; extern int qxl_num_crtc; extern int qxl_max_ioctls; -enum { - QXL_INFO_LEVEL = 1, - QXL_DEBUG_LEVEL = 2, -}; - -#define QXL_INFO(qdev, fmt, ...) do { \ - if (qxl_log_level >= QXL_INFO_LEVEL) { \ - qxl_io_log(qdev, fmt, __VA_ARGS__); \ - } \ - } while (0) -#define QXL_DEBUG(qdev, fmt, ...) do { \ - if (qxl_log_level >= QXL_DEBUG_LEVEL) { \ - qxl_io_log(qdev, fmt, __VA_ARGS__); \ - } \ - } while (0) -#define QXL_INFO_ONCE(qdev, fmt, ...) do { \ - static int done; \ - if (!done) { \ - done = 1; \ - QXL_INFO(qdev, fmt, __VA_ARGS__); \ - } \ - } while (0) - #define DRM_FILE_OFFSET 0x100000000ULL #define DRM_FILE_PAGE_OFFSET (DRM_FILE_OFFSET >> PAGE_SHIFT) @@ -113,6 +89,8 @@ struct qxl_bo { /* Constant after initialization */ struct drm_gem_object gem_base; bool is_primary; /* is this now a primary surface */ + bool is_dumb; + struct qxl_bo *shadow; bool hw_surf_alloc; struct qxl_surface surf; uint32_t surface_id; @@ -351,7 +329,7 @@ int qxl_check_idle(struct qxl_ring *ring); static inline void * qxl_fb_virtual_address(struct qxl_device *qdev, unsigned long physical) { - QXL_INFO(qdev, "not implemented (%lu)\n", physical); + DRM_DEBUG_DRIVER("not implemented (%lu)\n", physical); return 0; } diff --git a/drivers/gpu/drm/qxl/qxl_dumb.c b/drivers/gpu/drm/qxl/qxl_dumb.c index 5e65d5d2d937..11085ab01374 100644 --- a/drivers/gpu/drm/qxl/qxl_dumb.c +++ b/drivers/gpu/drm/qxl/qxl_dumb.c @@ -63,6 +63,7 @@ int qxl_mode_dumb_create(struct drm_file *file_priv, &handle); if (r) return r; + qobj->is_dumb = true; args->pitch = pitch; args->handle = handle; return 0; diff --git a/drivers/gpu/drm/qxl/qxl_fb.c b/drivers/gpu/drm/qxl/qxl_fb.c index 844c4a31ca13..23af3e352673 100644 --- a/drivers/gpu/drm/qxl/qxl_fb.c +++ b/drivers/gpu/drm/qxl/qxl_fb.c @@ -240,18 +240,15 @@ static int qxlfb_create(struct qxl_fbdev *qfbdev, return ret; qbo = gem_to_qxl_bo(gobj); - QXL_INFO(qdev, "%s: %dx%d %d\n", __func__, mode_cmd.width, - mode_cmd.height, mode_cmd.pitches[0]); + DRM_DEBUG_DRIVER("%dx%d %d\n", mode_cmd.width, + mode_cmd.height, mode_cmd.pitches[0]); shadow = vmalloc(mode_cmd.pitches[0] * mode_cmd.height); /* TODO: what's the usual response to memory allocation errors? */ BUG_ON(!shadow); - QXL_INFO(qdev, - "surface0 at gpu offset %lld, mmap_offset %lld (virt %p, shadow %p)\n", - qxl_bo_gpu_offset(qbo), - qxl_bo_mmap_offset(qbo), - qbo->kptr, - shadow); + DRM_DEBUG_DRIVER("surface0 at gpu offset %lld, mmap_offset %lld (virt %p, shadow %p)\n", + qxl_bo_gpu_offset(qbo), qxl_bo_mmap_offset(qbo), + qbo->kptr, shadow); size = mode_cmd.pitches[0] * mode_cmd.height; info = drm_fb_helper_alloc_fbi(&qfbdev->helper); diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index e6ec845b5be0..a6da6fa6ad58 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -154,7 +154,7 @@ qxl_release_alloc(struct qxl_device *qdev, int type, return handle; } *ret = release; - QXL_INFO(qdev, "allocated release %d\n", handle); + DRM_DEBUG_DRIVER("allocated release %d\n", handle); release->id = handle; return handle; } @@ -179,8 +179,7 @@ void qxl_release_free(struct qxl_device *qdev, struct qxl_release *release) { - QXL_INFO(qdev, "release %d, type %d\n", release->id, - release->type); + DRM_DEBUG_DRIVER("release %d, type %d\n", release->id, release->type); if (release->surface_release_id) qxl_surface_id_dealloc(qdev, release->surface_release_id); diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index 7ecf8a4b9fe6..ab4823875311 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -136,8 +136,8 @@ int qxl_mmap(struct file *filp, struct vm_area_struct *vma) "filp->private_data->minor->dev->dev_private == NULL\n"); return -EINVAL; } - QXL_INFO(qdev, "%s: filp->private_data = 0x%p, vma->vm_pgoff = %lx\n", - __func__, filp->private_data, vma->vm_pgoff); + DRM_DEBUG_DRIVER("filp->private_data = 0x%p, vma->vm_pgoff = %lx\n", + filp->private_data, vma->vm_pgoff); r = ttm_bo_mmap(filp, vma, &qdev->mman.bdev); if (unlikely(r != 0)) diff --git a/drivers/gpu/drm/r128/r128_drv.h b/drivers/gpu/drm/r128/r128_drv.h index 09143b840482..2de40d276116 100644 --- a/drivers/gpu/drm/r128/r128_drv.h +++ b/drivers/gpu/drm/r128/r128_drv.h @@ -147,6 +147,10 @@ extern int r128_engine_reset(struct drm_device *dev, void *data, struct drm_file extern int r128_fullscreen(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int r128_cce_buffers(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern int r128_cce_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern int r128_cce_depth(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern int r128_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv); + extern void r128_freelist_reset(struct drm_device *dev); extern int r128_wait_ring(drm_r128_private_t *dev_priv, int n); diff --git a/drivers/gpu/drm/r128/r128_ioc32.c b/drivers/gpu/drm/r128/r128_ioc32.c index 663f38c63ba6..6589f9e0310e 100644 --- a/drivers/gpu/drm/r128/r128_ioc32.c +++ b/drivers/gpu/drm/r128/r128_ioc32.c @@ -63,39 +63,36 @@ static int compat_r128_init(struct file *file, unsigned int cmd, unsigned long arg) { drm_r128_init32_t init32; - drm_r128_init_t __user *init; + drm_r128_init_t init; if (copy_from_user(&init32, (void __user *)arg, sizeof(init32))) return -EFAULT; - init = compat_alloc_user_space(sizeof(*init)); - if (!access_ok(VERIFY_WRITE, init, sizeof(*init)) - || __put_user(init32.func, &init->func) - || __put_user(init32.sarea_priv_offset, &init->sarea_priv_offset) - || __put_user(init32.is_pci, &init->is_pci) - || __put_user(init32.cce_mode, &init->cce_mode) - || __put_user(init32.cce_secure, &init->cce_secure) - || __put_user(init32.ring_size, &init->ring_size) - || __put_user(init32.usec_timeout, &init->usec_timeout) - || __put_user(init32.fb_bpp, &init->fb_bpp) - || __put_user(init32.front_offset, &init->front_offset) - || __put_user(init32.front_pitch, &init->front_pitch) - || __put_user(init32.back_offset, &init->back_offset) - || __put_user(init32.back_pitch, &init->back_pitch) - || __put_user(init32.depth_bpp, &init->depth_bpp) - || __put_user(init32.depth_offset, &init->depth_offset) - || __put_user(init32.depth_pitch, &init->depth_pitch) - || __put_user(init32.span_offset, &init->span_offset) - || __put_user(init32.fb_offset, &init->fb_offset) - || __put_user(init32.mmio_offset, &init->mmio_offset) - || __put_user(init32.ring_offset, &init->ring_offset) - || __put_user(init32.ring_rptr_offset, &init->ring_rptr_offset) - || __put_user(init32.buffers_offset, &init->buffers_offset) - || __put_user(init32.agp_textures_offset, - &init->agp_textures_offset)) - return -EFAULT; - - return drm_ioctl(file, DRM_IOCTL_R128_INIT, (unsigned long)init); + init.func = init32.func; + init.sarea_priv_offset = init32.sarea_priv_offset; + init.is_pci = init32.is_pci; + init.cce_mode = init32.cce_mode; + init.cce_secure = init32.cce_secure; + init.ring_size = init32.ring_size; + init.usec_timeout = init32.usec_timeout; + init.fb_bpp = init32.fb_bpp; + init.front_offset = init32.front_offset; + init.front_pitch = init32.front_pitch; + init.back_offset = init32.back_offset; + init.back_pitch = init32.back_pitch; + init.depth_bpp = init32.depth_bpp; + init.depth_offset = init32.depth_offset; + init.depth_pitch = init32.depth_pitch; + init.span_offset = init32.span_offset; + init.fb_offset = init32.fb_offset; + init.mmio_offset = init32.mmio_offset; + init.ring_offset = init32.ring_offset; + init.ring_rptr_offset = init32.ring_rptr_offset; + init.buffers_offset = init32.buffers_offset; + init.agp_textures_offset = init32.agp_textures_offset; + + return drm_ioctl_kernel(file, r128_cce_init, &init, + DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY); } typedef struct drm_r128_depth32 { @@ -111,25 +108,19 @@ static int compat_r128_depth(struct file *file, unsigned int cmd, unsigned long arg) { drm_r128_depth32_t depth32; - drm_r128_depth_t __user *depth; + drm_r128_depth_t depth; if (copy_from_user(&depth32, (void __user *)arg, sizeof(depth32))) return -EFAULT; - depth = compat_alloc_user_space(sizeof(*depth)); - if (!access_ok(VERIFY_WRITE, depth, sizeof(*depth)) - || __put_user(depth32.func, &depth->func) - || __put_user(depth32.n, &depth->n) - || __put_user((int __user *)(unsigned long)depth32.x, &depth->x) - || __put_user((int __user *)(unsigned long)depth32.y, &depth->y) - || __put_user((unsigned int __user *)(unsigned long)depth32.buffer, - &depth->buffer) - || __put_user((unsigned char __user *)(unsigned long)depth32.mask, - &depth->mask)) - return -EFAULT; - - return drm_ioctl(file, DRM_IOCTL_R128_DEPTH, (unsigned long)depth); + depth.func = depth32.func; + depth.n = depth32.n; + depth.x = compat_ptr(depth32.x); + depth.y = compat_ptr(depth32.y); + depth.buffer = compat_ptr(depth32.buffer); + depth.mask = compat_ptr(depth32.mask); + return drm_ioctl_kernel(file, r128_cce_depth, &depth, DRM_AUTH); } typedef struct drm_r128_stipple32 { @@ -140,18 +131,14 @@ static int compat_r128_stipple(struct file *file, unsigned int cmd, unsigned long arg) { drm_r128_stipple32_t stipple32; - drm_r128_stipple_t __user *stipple; + drm_r128_stipple_t stipple; if (copy_from_user(&stipple32, (void __user *)arg, sizeof(stipple32))) return -EFAULT; - stipple = compat_alloc_user_space(sizeof(*stipple)); - if (!access_ok(VERIFY_WRITE, stipple, sizeof(*stipple)) - || __put_user((unsigned int __user *)(unsigned long)stipple32.mask, - &stipple->mask)) - return -EFAULT; + stipple.mask = compat_ptr(stipple32.mask); - return drm_ioctl(file, DRM_IOCTL_R128_STIPPLE, (unsigned long)stipple); + return drm_ioctl_kernel(file, r128_cce_stipple, &stipple, DRM_AUTH); } typedef struct drm_r128_getparam32 { @@ -163,19 +150,15 @@ static int compat_r128_getparam(struct file *file, unsigned int cmd, unsigned long arg) { drm_r128_getparam32_t getparam32; - drm_r128_getparam_t __user *getparam; + drm_r128_getparam_t getparam; if (copy_from_user(&getparam32, (void __user *)arg, sizeof(getparam32))) return -EFAULT; - getparam = compat_alloc_user_space(sizeof(*getparam)); - if (!access_ok(VERIFY_WRITE, getparam, sizeof(*getparam)) - || __put_user(getparam32.param, &getparam->param) - || __put_user((void __user *)(unsigned long)getparam32.value, - &getparam->value)) - return -EFAULT; + getparam.param = getparam32.param; + getparam.value = compat_ptr(getparam32.value); - return drm_ioctl(file, DRM_IOCTL_R128_GETPARAM, (unsigned long)getparam); + return drm_ioctl_kernel(file, r128_getparam, &getparam, DRM_AUTH); } drm_ioctl_compat_t *r128_compat_ioctls[] = { diff --git a/drivers/gpu/drm/r128/r128_state.c b/drivers/gpu/drm/r128/r128_state.c index 8fd2d9f58f77..8fdc56c1c953 100644 --- a/drivers/gpu/drm/r128/r128_state.c +++ b/drivers/gpu/drm/r128/r128_state.c @@ -1460,7 +1460,7 @@ static int r128_cce_blit(struct drm_device *dev, void *data, struct drm_file *fi return ret; } -static int r128_cce_depth(struct drm_device *dev, void *data, struct drm_file *file_priv) +int r128_cce_depth(struct drm_device *dev, void *data, struct drm_file *file_priv) { drm_r128_private_t *dev_priv = dev->dev_private; drm_r128_depth_t *depth = data; @@ -1492,7 +1492,7 @@ static int r128_cce_depth(struct drm_device *dev, void *data, struct drm_file *f return ret; } -static int r128_cce_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv) +int r128_cce_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv) { drm_r128_private_t *dev_priv = dev->dev_private; drm_r128_stipple_t *stipple = data; @@ -1582,7 +1582,7 @@ static int r128_cce_indirect(struct drm_device *dev, void *data, struct drm_file return 0; } -static int r128_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv) +int r128_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv) { drm_r128_private_t *dev_priv = dev->dev_private; drm_r128_getparam_t *param = data; diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index be16c6390216..92ccd7aed0d4 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. @@ -102,8 +103,7 @@ radeon-y += \ radeon-y += \ radeon_vce.o \ vce_v1_0.o \ - vce_v2_0.o \ - radeon_kfd.o + vce_v2_0.o radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o radeon-$(CONFIG_ACPI) += radeon_acpi.o diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index a904c80c30e6..3e798593e042 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -45,34 +45,32 @@ static char *pre_emph_names[] = { /***** radeon AUX functions *****/ -/* Atom needs data in little endian format - * so swap as appropriate when copying data to - * or from atom. Note that atom operates on - * dw units. +/* Atom needs data in little endian format so swap as appropriate when copying + * data to or from atom. Note that atom operates on dw units. + * + * Use to_le=true when sending data to atom and provide at least + * ALIGN(num_bytes,4) bytes in the dst buffer. + * + * Use to_le=false when receiving data from atom and provide ALIGN(num_bytes,4) + * byes in the src buffer. */ void radeon_atom_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) { #ifdef __BIG_ENDIAN - u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */ - u32 *dst32, *src32; + u32 src_tmp[5], dst_tmp[5]; int i; + u8 align_num_bytes = ALIGN(num_bytes, 4); - memcpy(src_tmp, src, num_bytes); - src32 = (u32 *)src_tmp; - dst32 = (u32 *)dst_tmp; if (to_le) { - for (i = 0; i < ((num_bytes + 3) / 4); i++) - dst32[i] = cpu_to_le32(src32[i]); - memcpy(dst, dst_tmp, num_bytes); + memcpy(src_tmp, src, num_bytes); + for (i = 0; i < align_num_bytes / 4; i++) + dst_tmp[i] = cpu_to_le32(src_tmp[i]); + memcpy(dst, dst_tmp, align_num_bytes); } else { - u8 dws = num_bytes & ~3; - for (i = 0; i < ((num_bytes + 3) / 4); i++) - dst32[i] = le32_to_cpu(src32[i]); - memcpy(dst, dst_tmp, dws); - if (num_bytes % 4) { - for (i = 0; i < (num_bytes % 4); i++) - dst[dws+i] = dst_tmp[dws+i]; - } + memcpy(src_tmp, src, align_num_bytes); + for (i = 0; i < align_num_bytes / 4; i++) + dst_tmp[i] = le32_to_cpu(src_tmp[i]); + memcpy(dst, dst_tmp, num_bytes); } #else memcpy(dst, src, num_bytes); diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 3cb6c55b268d..898f9a078830 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -33,7 +33,6 @@ #include "cik_blit_shaders.h" #include "radeon_ucode.h" #include "clearstate_ci.h" -#include "radeon_kfd.h" #define SH_MEM_CONFIG_GFX_DEFAULT \ ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) @@ -5684,10 +5683,9 @@ int cik_vm_init(struct radeon_device *rdev) /* * number of VMs * VMID 0 is reserved for System - * radeon graphics/compute will use VMIDs 1-7 - * amdkfd will use VMIDs 8-15 + * radeon graphics/compute will use VMIDs 1-15 */ - rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS; + rdev->vm_manager.nvm = 16; /* base offset of vram pages */ if (rdev->flags & RADEON_IS_IGP) { u64 tmp = RREG32(MC_VM_FB_OFFSET); @@ -7589,9 +7587,6 @@ restart_ih: /* wptr/rptr are in bytes! */ ring_index = rptr / 4; - radeon_kfd_interrupt(rdev, - (const void *) &rdev->ih.ring[ring_index]); - src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff; src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff; ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff; @@ -8486,10 +8481,6 @@ static int cik_startup(struct radeon_device *rdev) if (r) return r; - r = radeon_kfd_resume(rdev); - if (r) - return r; - return 0; } @@ -8538,7 +8529,6 @@ int cik_resume(struct radeon_device *rdev) */ int cik_suspend(struct radeon_device *rdev) { - radeon_kfd_suspend(rdev); radeon_pm_suspend(rdev); radeon_audio_fini(rdev); radeon_vm_manager_fini(rdev); diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index e21015475ed5..cda16fcd43bb 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -30,8 +30,6 @@ #define CIK_RB_BITMAP_WIDTH_PER_SH 2 #define HAWAII_RB_BITMAP_WIDTH_PER_SH 4 -#define RADEON_NUM_OF_VMIDS 8 - /* DIDT IND registers */ #define DIDT_SQ_CTRL0 0x0 # define DIDT_CTRL_EN (1 << 0) diff --git a/drivers/gpu/drm/radeon/mkregtable.c b/drivers/gpu/drm/radeon/mkregtable.c index b928c17bdeed..c21d8fa591ef 100644 --- a/drivers/gpu/drm/radeon/mkregtable.c +++ b/drivers/gpu/drm/radeon/mkregtable.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* utility to create the register check tables * this includes inlined list.h safe for userspace. * diff --git a/drivers/gpu/drm/radeon/r100_track.h b/drivers/gpu/drm/radeon/r100_track.h index eb40888bdfcc..ad16a925f8d5 100644 --- a/drivers/gpu/drm/radeon/r100_track.h +++ b/drivers/gpu/drm/radeon/r100_track.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #define R100_TRACK_MAX_TEXTURE 3 #define R200_TRACK_MAX_TEXTURE 6 diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8cbaeec090c9..a8e546569858 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2456,9 +2456,6 @@ struct radeon_device { u64 vram_pin_size; u64 gart_pin_size; - /* amdkfd interface */ - struct kfd_dev *kfd; - struct mutex mn_lock; DECLARE_HASHTABLE(mn_hash, 7); }; diff --git a/drivers/gpu/drm/radeon/radeon_dp_mst.c b/drivers/gpu/drm/radeon/radeon_dp_mst.c index ebdf1b859cb6..2917ea1b667e 100644 --- a/drivers/gpu/drm/radeon/radeon_dp_mst.c +++ b/drivers/gpu/drm/radeon/radeon_dp_mst.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <drm/drmP.h> #include <drm/drm_dp_mst_helper.h> diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index f4becad0a78c..31dd04f6baa1 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -43,7 +43,6 @@ #include <drm/drm_fb_helper.h> #include <drm/drm_crtc_helper.h> -#include "radeon_kfd.h" /* * KMS wrapper. @@ -338,14 +337,6 @@ static int radeon_pci_probe(struct pci_dev *pdev, { int ret; - /* - * Initialize amdkfd before starting radeon. If it was not loaded yet, - * defer radeon probing - */ - ret = radeon_kfd_init(); - if (ret == -EPROBE_DEFER) - return ret; - if (vga_switcheroo_client_probe_defer(pdev)) return -EPROBE_DEFER; @@ -645,7 +636,6 @@ static int __init radeon_init(void) static void __exit radeon_exit(void) { - radeon_kfd_fini(); pci_unregister_driver(pdriver); radeon_unregister_atpx_handler(); } diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 3386452bd2f0..cf3deb283da5 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -451,7 +451,7 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, else r = 0; - cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type); + cur_placement = READ_ONCE(robj->tbo.mem.mem_type); args->domain = radeon_mem_type_to_domain(cur_placement); drm_gem_object_put_unlocked(gobj); return r; @@ -481,7 +481,7 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, r = ret; /* Flush HDP cache via MMIO if necessary */ - cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type); + cur_placement = READ_ONCE(robj->tbo.mem.mem_type); if (rdev->asic->mmio_hdp_flush && radeon_mem_type_to_domain(cur_placement) == RADEON_GEM_DOMAIN_VRAM) robj->rdev->asic->mmio_hdp_flush(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c deleted file mode 100644 index 385b4d76956d..000000000000 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ /dev/null @@ -1,901 +0,0 @@ -/* - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include <linux/module.h> -#include <linux/fdtable.h> -#include <linux/uaccess.h> -#include <drm/drmP.h> -#include "radeon.h" -#include "cikd.h" -#include "cik_reg.h" -#include "radeon_kfd.h" -#include "radeon_ucode.h" -#include <linux/firmware.h> -#include "cik_structs.h" - -#define CIK_PIPE_PER_MEC (4) - -static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = { - TCP_WATCH0_ADDR_H, TCP_WATCH0_ADDR_L, TCP_WATCH0_CNTL, - TCP_WATCH1_ADDR_H, TCP_WATCH1_ADDR_L, TCP_WATCH1_CNTL, - TCP_WATCH2_ADDR_H, TCP_WATCH2_ADDR_L, TCP_WATCH2_CNTL, - TCP_WATCH3_ADDR_H, TCP_WATCH3_ADDR_L, TCP_WATCH3_CNTL -}; - -struct kgd_mem { - struct radeon_bo *bo; - uint64_t gpu_addr; - void *cpu_ptr; -}; - - -static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, - void **mem_obj, uint64_t *gpu_addr, - void **cpu_ptr); - -static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); - -static uint64_t get_vmem_size(struct kgd_dev *kgd); -static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); - -static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); - -static int alloc_pasid(unsigned int bits); -static void free_pasid(unsigned int pasid); - -static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); - -/* - * Register access functions - */ - -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, - uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, - uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); - -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, - unsigned int vmid); - -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr); -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id); - -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, - unsigned int timeout, uint32_t pipe_id, - uint32_t queue_id); -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, - unsigned int timeout); -static int kgd_address_watch_disable(struct kgd_dev *kgd); -static int kgd_address_watch_execute(struct kgd_dev *kgd, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo); -static int kgd_wave_control_execute(struct kgd_dev *kgd, - uint32_t gfx_index_val, - uint32_t sq_cmd); -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, - unsigned int watch_point_id, - unsigned int reg_offset); - -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid); -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); - -static const struct kfd2kgd_calls kfd2kgd = { - .init_gtt_mem_allocation = alloc_gtt_mem, - .free_gtt_mem = free_gtt_mem, - .get_vmem_size = get_vmem_size, - .get_gpu_clock_counter = get_gpu_clock_counter, - .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, - .alloc_pasid = alloc_pasid, - .free_pasid = free_pasid, - .program_sh_mem_settings = kgd_program_sh_mem_settings, - .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_pipeline = kgd_init_pipeline, - .init_interrupts = kgd_init_interrupts, - .hqd_load = kgd_hqd_load, - .hqd_sdma_load = kgd_hqd_sdma_load, - .hqd_is_occupied = kgd_hqd_is_occupied, - .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, - .hqd_destroy = kgd_hqd_destroy, - .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, - .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, - .write_vmid_invalidate_request = write_vmid_invalidate_request, - .get_fw_version = get_fw_version -}; - -static const struct kgd2kfd_calls *kgd2kfd; - -int radeon_kfd_init(void) -{ - int ret; - -#if defined(CONFIG_HSA_AMD_MODULE) - int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); - - kgd2kfd_init_p = symbol_request(kgd2kfd_init); - - if (kgd2kfd_init_p == NULL) - return -ENOENT; - - ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd); - if (ret) { - symbol_put(kgd2kfd_init); - kgd2kfd = NULL; - } - -#elif defined(CONFIG_HSA_AMD) - ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd); - if (ret) - kgd2kfd = NULL; - -#else - ret = -ENOENT; -#endif - - return ret; -} - -void radeon_kfd_fini(void) -{ - if (kgd2kfd) { - kgd2kfd->exit(); - symbol_put(kgd2kfd_init); - } -} - -void radeon_kfd_device_probe(struct radeon_device *rdev) -{ - if (kgd2kfd) - rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev, - rdev->pdev, &kfd2kgd); -} - -void radeon_kfd_device_init(struct radeon_device *rdev) -{ - int i, queue, pipe, mec; - - if (rdev->kfd) { - struct kgd2kfd_shared_resources gpu_resources = { - .compute_vmid_bitmap = 0xFF00, - .num_pipe_per_mec = 4, - .num_queue_per_pipe = 8 - }; - - bitmap_zero(gpu_resources.queue_bitmap, KGD_MAX_QUEUES); - - for (i = 0; i < KGD_MAX_QUEUES; ++i) { - queue = i % gpu_resources.num_queue_per_pipe; - pipe = (i / gpu_resources.num_queue_per_pipe) - % gpu_resources.num_pipe_per_mec; - mec = (i / gpu_resources.num_queue_per_pipe) - / gpu_resources.num_pipe_per_mec; - - if (mec == 0 && pipe > 0) - set_bit(i, gpu_resources.queue_bitmap); - } - - radeon_doorbell_get_kfd_info(rdev, - &gpu_resources.doorbell_physical_address, - &gpu_resources.doorbell_aperture_size, - &gpu_resources.doorbell_start_offset); - - kgd2kfd->device_init(rdev->kfd, &gpu_resources); - } -} - -void radeon_kfd_device_fini(struct radeon_device *rdev) -{ - if (rdev->kfd) { - kgd2kfd->device_exit(rdev->kfd); - rdev->kfd = NULL; - } -} - -void radeon_kfd_interrupt(struct radeon_device *rdev, const void *ih_ring_entry) -{ - if (rdev->kfd) - kgd2kfd->interrupt(rdev->kfd, ih_ring_entry); -} - -void radeon_kfd_suspend(struct radeon_device *rdev) -{ - if (rdev->kfd) - kgd2kfd->suspend(rdev->kfd); -} - -int radeon_kfd_resume(struct radeon_device *rdev) -{ - int r = 0; - - if (rdev->kfd) - r = kgd2kfd->resume(rdev->kfd); - - return r; -} - -static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, - void **mem_obj, uint64_t *gpu_addr, - void **cpu_ptr) -{ - struct radeon_device *rdev = (struct radeon_device *)kgd; - struct kgd_mem **mem = (struct kgd_mem **) mem_obj; - int r; - - BUG_ON(kgd == NULL); - BUG_ON(gpu_addr == NULL); - BUG_ON(cpu_ptr == NULL); - - *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL); - if ((*mem) == NULL) - return -ENOMEM; - - r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT, - RADEON_GEM_GTT_WC, NULL, NULL, &(*mem)->bo); - if (r) { - dev_err(rdev->dev, - "failed to allocate BO for amdkfd (%d)\n", r); - return r; - } - - /* map the buffer */ - r = radeon_bo_reserve((*mem)->bo, true); - if (r) { - dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r); - goto allocate_mem_reserve_bo_failed; - } - - r = radeon_bo_pin((*mem)->bo, RADEON_GEM_DOMAIN_GTT, - &(*mem)->gpu_addr); - if (r) { - dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r); - goto allocate_mem_pin_bo_failed; - } - *gpu_addr = (*mem)->gpu_addr; - - r = radeon_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); - if (r) { - dev_err(rdev->dev, - "(%d) failed to map bo to kernel for amdkfd\n", r); - goto allocate_mem_kmap_bo_failed; - } - *cpu_ptr = (*mem)->cpu_ptr; - - radeon_bo_unreserve((*mem)->bo); - - return 0; - -allocate_mem_kmap_bo_failed: - radeon_bo_unpin((*mem)->bo); -allocate_mem_pin_bo_failed: - radeon_bo_unreserve((*mem)->bo); -allocate_mem_reserve_bo_failed: - radeon_bo_unref(&(*mem)->bo); - - return r; -} - -static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) -{ - struct kgd_mem *mem = (struct kgd_mem *) mem_obj; - - BUG_ON(mem == NULL); - - radeon_bo_reserve(mem->bo, true); - radeon_bo_kunmap(mem->bo); - radeon_bo_unpin(mem->bo); - radeon_bo_unreserve(mem->bo); - radeon_bo_unref(&(mem->bo)); - kfree(mem); -} - -static uint64_t get_vmem_size(struct kgd_dev *kgd) -{ - struct radeon_device *rdev = (struct radeon_device *)kgd; - - BUG_ON(kgd == NULL); - - return rdev->mc.real_vram_size; -} - -static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) -{ - struct radeon_device *rdev = (struct radeon_device *)kgd; - - return rdev->asic->get_gpu_clock_counter(rdev); -} - -static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) -{ - struct radeon_device *rdev = (struct radeon_device *)kgd; - - /* The sclk is in quantas of 10kHz */ - return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; -} - -/* - * PASID manager - */ -static DEFINE_IDA(pasid_ida); - -static int alloc_pasid(unsigned int bits) -{ - int pasid = -EINVAL; - - for (bits = min(bits, 31U); bits > 0; bits--) { - pasid = ida_simple_get(&pasid_ida, - 1U << (bits - 1), 1U << bits, - GFP_KERNEL); - if (pasid != -ENOSPC) - break; - } - - return pasid; -} - -static void free_pasid(unsigned int pasid) -{ - ida_simple_remove(&pasid_ida, pasid); -} - -static inline struct radeon_device *get_radeon_device(struct kgd_dev *kgd) -{ - return (struct radeon_device *)kgd; -} - -static void write_register(struct kgd_dev *kgd, uint32_t offset, uint32_t value) -{ - struct radeon_device *rdev = get_radeon_device(kgd); - - writel(value, (void __iomem *)(rdev->rmmio + offset)); -} - -static uint32_t read_register(struct kgd_dev *kgd, uint32_t offset) -{ - struct radeon_device *rdev = get_radeon_device(kgd); - - return readl((void __iomem *)(rdev->rmmio + offset)); -} - -static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, - uint32_t queue, uint32_t vmid) -{ - struct radeon_device *rdev = get_radeon_device(kgd); - uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue); - - mutex_lock(&rdev->srbm_mutex); - write_register(kgd, SRBM_GFX_CNTL, value); -} - -static void unlock_srbm(struct kgd_dev *kgd) -{ - struct radeon_device *rdev = get_radeon_device(kgd); - - write_register(kgd, SRBM_GFX_CNTL, 0); - mutex_unlock(&rdev->srbm_mutex); -} - -static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t queue_id) -{ - uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; - uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); - - lock_srbm(kgd, mec, pipe, queue_id, 0); -} - -static void release_queue(struct kgd_dev *kgd) -{ - unlock_srbm(kgd); -} - -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, - uint32_t sh_mem_config, - uint32_t sh_mem_ape1_base, - uint32_t sh_mem_ape1_limit, - uint32_t sh_mem_bases) -{ - lock_srbm(kgd, 0, 0, 0, vmid); - - write_register(kgd, SH_MEM_CONFIG, sh_mem_config); - write_register(kgd, SH_MEM_APE1_BASE, sh_mem_ape1_base); - write_register(kgd, SH_MEM_APE1_LIMIT, sh_mem_ape1_limit); - write_register(kgd, SH_MEM_BASES, sh_mem_bases); - - unlock_srbm(kgd); -} - -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, - unsigned int vmid) -{ - /* - * We have to assume that there is no outstanding mapping. - * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 - * because a mapping is in progress or because a mapping finished and - * the SW cleared it. - * So the protocol is to always wait & clear. - */ - uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | - ATC_VMID_PASID_MAPPING_VALID_MASK; - - write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t), - pasid_mapping); - - while (!(read_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS) & - (1U << vmid))) - cpu_relax(); - write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid); - - /* Mapping vmid to pasid also for IH block */ - write_register(kgd, IH_VMID_0_LUT + vmid * sizeof(uint32_t), - pasid_mapping); - - return 0; -} - -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr) -{ - /* nothing to do here */ - return 0; -} - -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) -{ - uint32_t mec; - uint32_t pipe; - - mec = (pipe_id / CIK_PIPE_PER_MEC) + 1; - pipe = (pipe_id % CIK_PIPE_PER_MEC); - - lock_srbm(kgd, mec, pipe, 0, 0); - - write_register(kgd, CPC_INT_CNTL, - TIME_STAMP_INT_ENABLE | OPCODE_ERROR_INT_ENABLE); - - unlock_srbm(kgd); - - return 0; -} - -static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) -{ - uint32_t retval; - - retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + - m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; - - pr_debug("kfd: sdma base address: 0x%x\n", retval); - - return retval; -} - -static inline struct cik_mqd *get_mqd(void *mqd) -{ - return (struct cik_mqd *)mqd; -} - -static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) -{ - return (struct cik_sdma_rlc_registers *)mqd; -} - -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm) -{ - uint32_t wptr_shadow, is_wptr_shadow_valid; - struct cik_mqd *m; - - m = get_mqd(mqd); - - is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); - - acquire_queue(kgd, pipe_id, queue_id); - write_register(kgd, CP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); - write_register(kgd, CP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); - write_register(kgd, CP_MQD_CONTROL, m->cp_mqd_control); - - write_register(kgd, CP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); - write_register(kgd, CP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); - write_register(kgd, CP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); - - write_register(kgd, CP_HQD_IB_CONTROL, m->cp_hqd_ib_control); - write_register(kgd, CP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo); - write_register(kgd, CP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi); - - write_register(kgd, CP_HQD_IB_RPTR, m->cp_hqd_ib_rptr); - - write_register(kgd, CP_HQD_PERSISTENT_STATE, - m->cp_hqd_persistent_state); - write_register(kgd, CP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd); - write_register(kgd, CP_HQD_MSG_TYPE, m->cp_hqd_msg_type); - - write_register(kgd, CP_HQD_ATOMIC0_PREOP_LO, - m->cp_hqd_atomic0_preop_lo); - - write_register(kgd, CP_HQD_ATOMIC0_PREOP_HI, - m->cp_hqd_atomic0_preop_hi); - - write_register(kgd, CP_HQD_ATOMIC1_PREOP_LO, - m->cp_hqd_atomic1_preop_lo); - - write_register(kgd, CP_HQD_ATOMIC1_PREOP_HI, - m->cp_hqd_atomic1_preop_hi); - - write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR, - m->cp_hqd_pq_rptr_report_addr_lo); - - write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR_HI, - m->cp_hqd_pq_rptr_report_addr_hi); - - write_register(kgd, CP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr); - - write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR, - m->cp_hqd_pq_wptr_poll_addr_lo); - - write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR_HI, - m->cp_hqd_pq_wptr_poll_addr_hi); - - write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, - m->cp_hqd_pq_doorbell_control); - - write_register(kgd, CP_HQD_VMID, m->cp_hqd_vmid); - - write_register(kgd, CP_HQD_QUANTUM, m->cp_hqd_quantum); - - write_register(kgd, CP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); - write_register(kgd, CP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); - - write_register(kgd, CP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr); - - if (is_wptr_shadow_valid) - write_register(kgd, CP_HQD_PQ_WPTR, wptr_shadow); - - write_register(kgd, CP_HQD_ACTIVE, m->cp_hqd_active); - release_queue(kgd); - - return 0; -} - -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) -{ - struct cik_sdma_rlc_registers *m; - uint32_t sdma_base_addr; - - m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); - - write_register(kgd, - sdma_base_addr + SDMA0_RLC0_VIRTUAL_ADDR, - m->sdma_rlc_virtual_addr); - - write_register(kgd, - sdma_base_addr + SDMA0_RLC0_RB_BASE, - m->sdma_rlc_rb_base); - - write_register(kgd, - sdma_base_addr + SDMA0_RLC0_RB_BASE_HI, - m->sdma_rlc_rb_base_hi); - - write_register(kgd, - sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_LO, - m->sdma_rlc_rb_rptr_addr_lo); - - write_register(kgd, - sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_HI, - m->sdma_rlc_rb_rptr_addr_hi); - - write_register(kgd, - sdma_base_addr + SDMA0_RLC0_DOORBELL, - m->sdma_rlc_doorbell); - - write_register(kgd, - sdma_base_addr + SDMA0_RLC0_RB_CNTL, - m->sdma_rlc_rb_cntl); - - return 0; -} - -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id) -{ - uint32_t act; - bool retval = false; - uint32_t low, high; - - acquire_queue(kgd, pipe_id, queue_id); - act = read_register(kgd, CP_HQD_ACTIVE); - if (act) { - low = lower_32_bits(queue_address >> 8); - high = upper_32_bits(queue_address >> 8); - - if (low == read_register(kgd, CP_HQD_PQ_BASE) && - high == read_register(kgd, CP_HQD_PQ_BASE_HI)) - retval = true; - } - release_queue(kgd); - return retval; -} - -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) -{ - struct cik_sdma_rlc_registers *m; - uint32_t sdma_base_addr; - uint32_t sdma_rlc_rb_cntl; - - m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); - - sdma_rlc_rb_cntl = read_register(kgd, - sdma_base_addr + SDMA0_RLC0_RB_CNTL); - - if (sdma_rlc_rb_cntl & SDMA_RB_ENABLE) - return true; - - return false; -} - -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, - unsigned int timeout, uint32_t pipe_id, - uint32_t queue_id) -{ - uint32_t temp; - - acquire_queue(kgd, pipe_id, queue_id); - write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, 0); - - write_register(kgd, CP_HQD_DEQUEUE_REQUEST, reset_type); - - while (true) { - temp = read_register(kgd, CP_HQD_ACTIVE); - if (temp & 0x1) - break; - if (timeout == 0) { - pr_err("kfd: cp queue preemption time out (%dms)\n", - temp); - release_queue(kgd); - return -ETIME; - } - msleep(20); - timeout -= 20; - } - - release_queue(kgd); - return 0; -} - -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, - unsigned int timeout) -{ - struct cik_sdma_rlc_registers *m; - uint32_t sdma_base_addr; - uint32_t temp; - - m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); - - temp = read_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL); - temp = temp & ~SDMA_RB_ENABLE; - write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL, temp); - - while (true) { - temp = read_register(kgd, sdma_base_addr + - SDMA0_RLC0_CONTEXT_STATUS); - if (temp & SDMA_RLC_IDLE) - break; - if (timeout == 0) - return -ETIME; - msleep(20); - timeout -= 20; - } - - write_register(kgd, sdma_base_addr + SDMA0_RLC0_DOORBELL, 0); - write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_RPTR, 0); - write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_WPTR, 0); - write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_BASE, 0); - - return 0; -} - -static int kgd_address_watch_disable(struct kgd_dev *kgd) -{ - union TCP_WATCH_CNTL_BITS cntl; - unsigned int i; - - cntl.u32All = 0; - - cntl.bitfields.valid = 0; - cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; - cntl.bitfields.atc = 1; - - /* Turning off this address until we set all the registers */ - for (i = 0; i < MAX_WATCH_ADDRESSES; i++) - write_register(kgd, - watchRegs[i * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_CNTL], - cntl.u32All); - - return 0; -} - -static int kgd_address_watch_execute(struct kgd_dev *kgd, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo) -{ - union TCP_WATCH_CNTL_BITS cntl; - - cntl.u32All = cntl_val; - - /* Turning off this watch point until we set all the registers */ - cntl.bitfields.valid = 0; - write_register(kgd, - watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_CNTL], - cntl.u32All); - - write_register(kgd, - watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_ADDR_HI], - addr_hi); - - write_register(kgd, - watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_ADDR_LO], - addr_lo); - - /* Enable the watch point */ - cntl.bitfields.valid = 1; - - write_register(kgd, - watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_CNTL], - cntl.u32All); - - return 0; -} - -static int kgd_wave_control_execute(struct kgd_dev *kgd, - uint32_t gfx_index_val, - uint32_t sq_cmd) -{ - struct radeon_device *rdev = get_radeon_device(kgd); - uint32_t data; - - mutex_lock(&rdev->grbm_idx_mutex); - - write_register(kgd, GRBM_GFX_INDEX, gfx_index_val); - write_register(kgd, SQ_CMD, sq_cmd); - - /* Restore the GRBM_GFX_INDEX register */ - - data = INSTANCE_BROADCAST_WRITES | SH_BROADCAST_WRITES | - SE_BROADCAST_WRITES; - - write_register(kgd, GRBM_GFX_INDEX, data); - - mutex_unlock(&rdev->grbm_idx_mutex); - - return 0; -} - -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, - unsigned int watch_point_id, - unsigned int reg_offset) -{ - return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset] - / 4; -} - -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid) -{ - uint32_t reg; - struct radeon_device *rdev = (struct radeon_device *) kgd; - - reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4); - return reg & ATC_VMID_PASID_MAPPING_VALID_MASK; -} - -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid) -{ - uint32_t reg; - struct radeon_device *rdev = (struct radeon_device *) kgd; - - reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4); - return reg & ATC_VMID_PASID_MAPPING_PASID_MASK; -} - -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) -{ - struct radeon_device *rdev = (struct radeon_device *) kgd; - - return WREG32(VM_INVALIDATE_REQUEST, 1 << vmid); -} - -static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) -{ - struct radeon_device *rdev = (struct radeon_device *) kgd; - const union radeon_firmware_header *hdr; - - BUG_ON(kgd == NULL || rdev->mec_fw == NULL); - - switch (type) { - case KGD_ENGINE_PFP: - hdr = (const union radeon_firmware_header *) rdev->pfp_fw->data; - break; - - case KGD_ENGINE_ME: - hdr = (const union radeon_firmware_header *) rdev->me_fw->data; - break; - - case KGD_ENGINE_CE: - hdr = (const union radeon_firmware_header *) rdev->ce_fw->data; - break; - - case KGD_ENGINE_MEC1: - hdr = (const union radeon_firmware_header *) rdev->mec_fw->data; - break; - - case KGD_ENGINE_MEC2: - hdr = (const union radeon_firmware_header *) - rdev->mec2_fw->data; - break; - - case KGD_ENGINE_RLC: - hdr = (const union radeon_firmware_header *) rdev->rlc_fw->data; - break; - - case KGD_ENGINE_SDMA1: - case KGD_ENGINE_SDMA2: - hdr = (const union radeon_firmware_header *) - rdev->sdma_fw->data; - break; - - default: - return 0; - } - - if (hdr == NULL) - return 0; - - /* Only 12 bit in use*/ - return hdr->common.ucode_version; -} diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index dfee8f7d94ae..cde037f213d7 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -34,8 +34,6 @@ #include <linux/slab.h> #include <linux/pm_runtime.h> -#include "radeon_kfd.h" - #if defined(CONFIG_VGA_SWITCHEROO) bool radeon_has_atpx(void); #else @@ -68,8 +66,6 @@ void radeon_driver_unload_kms(struct drm_device *dev) pm_runtime_forbid(dev->dev); } - radeon_kfd_device_fini(rdev); - radeon_acpi_fini(rdev); radeon_modeset_fini(rdev); @@ -174,9 +170,6 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) "Error during ACPI methods call\n"); } - radeon_kfd_device_probe(rdev); - radeon_kfd_device_init(rdev); - if (radeon_is_px(dev)) { pm_runtime_use_autosuspend(dev->dev); pm_runtime_set_autosuspend_delay(dev->dev, 5000); diff --git a/drivers/gpu/drm/radeon/radeon_legacy_tv.c b/drivers/gpu/drm/radeon/radeon_legacy_tv.c index 49750d07ab7d..611cf934b211 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_tv.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_tv.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <drm/drmP.h> #include <drm/drm_crtc_helper.h> #include "radeon.h" diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h index 815eaa8c394b..bc26efd1793e 100644 --- a/drivers/gpu/drm/radeon/radeon_trace.h +++ b/drivers/gpu/drm/radeon/radeon_trace.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #if !defined(_RADEON_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) #define _RADEON_TRACE_H_ diff --git a/drivers/gpu/drm/radeon/radeon_trace_points.c b/drivers/gpu/drm/radeon/radeon_trace_points.c index e51d3575976b..66b3d5084662 100644 --- a/drivers/gpu/drm/radeon/radeon_trace_points.c +++ b/drivers/gpu/drm/radeon/radeon_trace_points.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright Red Hat Inc 2010. * Author : Dave Airlie <airlied@redhat.com> */ diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 8032da57e409..6ada64db00e9 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -597,7 +597,7 @@ release_sg: kfree(ttm->sg); release_pages: - release_pages(ttm->pages, pinned, 0); + release_pages(ttm->pages, pinned); return r; } diff --git a/drivers/gpu/drm/rcar-du/Makefile b/drivers/gpu/drm/rcar-du/Makefile index 2131e722de3b..0cf5c11030e8 100644 --- a/drivers/gpu/drm/rcar-du/Makefile +++ b/drivers/gpu/drm/rcar-du/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 rcar-du-drm-y := rcar_du_crtc.o \ rcar_du_drv.o \ rcar_du_encoder.o \ diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig index 3c70c6224bd2..0ccc76217ee4 100644 --- a/drivers/gpu/drm/rockchip/Kconfig +++ b/drivers/gpu/drm/rockchip/Kconfig @@ -60,7 +60,7 @@ config ROCKCHIP_INNO_HDMI config ROCKCHIP_LVDS bool "Rockchip LVDS support" depends on DRM_ROCKCHIP - depends on PINCTRL + depends on PINCTRL && OF help Choose this option to enable support for Rockchip LVDS controllers. Rockchip rk3288 SoC has LVDS TX Controller can be used, and it diff --git a/drivers/gpu/drm/rockchip/Makefile b/drivers/gpu/drm/rockchip/Makefile index a881d2cc4f25..a314e2109e76 100644 --- a/drivers/gpu/drm/rockchip/Makefile +++ b/drivers/gpu/drm/rockchip/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c index 4d3f6ad0abdd..93b7102dd008 100644 --- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c +++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c @@ -72,7 +72,7 @@ struct rockchip_dp_device { struct reset_control *rst; struct work_struct psr_work; - spinlock_t psr_lock; + struct mutex psr_lock; unsigned int psr_state; const struct rockchip_dp_chip_data *data; @@ -83,21 +83,20 @@ struct rockchip_dp_device { static void analogix_dp_psr_set(struct drm_encoder *encoder, bool enabled) { struct rockchip_dp_device *dp = to_dp(encoder); - unsigned long flags; if (!analogix_dp_psr_supported(dp->dev)) return; DRM_DEV_DEBUG(dp->dev, "%s PSR...\n", enabled ? "Entry" : "Exit"); - spin_lock_irqsave(&dp->psr_lock, flags); + mutex_lock(&dp->psr_lock); if (enabled) dp->psr_state = EDP_VSC_PSR_STATE_ACTIVE; else dp->psr_state = ~EDP_VSC_PSR_STATE_ACTIVE; schedule_work(&dp->psr_work); - spin_unlock_irqrestore(&dp->psr_lock, flags); + mutex_unlock(&dp->psr_lock); } static void analogix_dp_psr_work(struct work_struct *work) @@ -105,7 +104,6 @@ static void analogix_dp_psr_work(struct work_struct *work) struct rockchip_dp_device *dp = container_of(work, typeof(*dp), psr_work); int ret; - unsigned long flags; ret = rockchip_drm_wait_vact_end(dp->encoder.crtc, PSR_WAIT_LINE_FLAG_TIMEOUT_MS); @@ -114,12 +112,12 @@ static void analogix_dp_psr_work(struct work_struct *work) return; } - spin_lock_irqsave(&dp->psr_lock, flags); + mutex_lock(&dp->psr_lock); if (dp->psr_state == EDP_VSC_PSR_STATE_ACTIVE) analogix_dp_enable_psr(dp->dev); else analogix_dp_disable_psr(dp->dev); - spin_unlock_irqrestore(&dp->psr_lock, flags); + mutex_unlock(&dp->psr_lock); } static int rockchip_dp_pre_init(struct rockchip_dp_device *dp) @@ -381,7 +379,7 @@ static int rockchip_dp_bind(struct device *dev, struct device *master, dp->plat_data.power_off = rockchip_dp_powerdown; dp->plat_data.get_modes = rockchip_dp_get_modes; - spin_lock_init(&dp->psr_lock); + mutex_init(&dp->psr_lock); dp->psr_state = ~EDP_VSC_PSR_STATE_ACTIVE; INIT_WORK(&dp->psr_work, analogix_dp_psr_work); diff --git a/drivers/gpu/drm/selftests/drm_mm_selftests.h b/drivers/gpu/drm/selftests/drm_mm_selftests.h index 37bbdac52896..54acc117550c 100644 --- a/drivers/gpu/drm/selftests/drm_mm_selftests.h +++ b/drivers/gpu/drm/selftests/drm_mm_selftests.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* List each unit test as selftest(name, function) * * The name is used as both an enum and expanded as igt__name to create diff --git a/drivers/gpu/drm/shmobile/Makefile b/drivers/gpu/drm/shmobile/Makefile index 4c3eeb355630..861edafed856 100644 --- a/drivers/gpu/drm/shmobile/Makefile +++ b/drivers/gpu/drm/shmobile/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 shmob-drm-y := shmob_drm_backlight.o \ shmob_drm_crtc.o \ shmob_drm_drv.o \ diff --git a/drivers/gpu/drm/sti/Makefile b/drivers/gpu/drm/sti/Makefile index c35db12435c3..f203ac5514ae 100644 --- a/drivers/gpu/drm/sti/Makefile +++ b/drivers/gpu/drm/sti/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 sti-drm-y := \ sti_mixer.o \ sti_gdp.o \ diff --git a/drivers/gpu/drm/sun4i/Makefile b/drivers/gpu/drm/sun4i/Makefile index 43c753cafc88..0c2f8c7facae 100644 --- a/drivers/gpu/drm/sun4i/Makefile +++ b/drivers/gpu/drm/sun4i/Makefile @@ -1,24 +1,26 @@ -sun4i-drm-y += sun4i_drv.o -sun4i-drm-y += sun4i_framebuffer.o +# SPDX-License-Identifier: GPL-2.0 +sun4i-backend-y += sun4i_backend.o sun4i_layer.o -sun4i-drm-hdmi-y += sun4i_hdmi_enc.o -sun4i-drm-hdmi-y += sun4i_hdmi_i2c.o -sun4i-drm-hdmi-y += sun4i_hdmi_ddc_clk.o -sun4i-drm-hdmi-y += sun4i_hdmi_tmds_clk.o +sun4i-drm-y += sun4i_drv.o +sun4i-drm-y += sun4i_framebuffer.o -sun4i-tcon-y += sun4i_tcon.o -sun4i-tcon-y += sun4i_rgb.o -sun4i-tcon-y += sun4i_dotclock.o -sun4i-tcon-y += sun4i_crtc.o +sun4i-drm-hdmi-y += sun4i_hdmi_ddc_clk.o +sun4i-drm-hdmi-y += sun4i_hdmi_enc.o +sun4i-drm-hdmi-y += sun4i_hdmi_i2c.o +sun4i-drm-hdmi-y += sun4i_hdmi_tmds_clk.o -sun4i-backend-y += sun4i_backend.o sun4i_layer.o +sun8i-mixer-y += sun8i_mixer.o sun8i_layer.o -sun8i-mixer-y += sun8i_mixer.o sun8i_layer.o +sun4i-tcon-y += sun4i_crtc.o +sun4i-tcon-y += sun4i_dotclock.o +sun4i-tcon-y += sun4i_tcon.o +sun4i-tcon-y += sun4i_rgb.o -obj-$(CONFIG_DRM_SUN4I) += sun4i-drm.o sun4i-tcon.o -obj-$(CONFIG_DRM_SUN4I) += sun6i_drc.o +obj-$(CONFIG_DRM_SUN4I) += sun4i-drm.o +obj-$(CONFIG_DRM_SUN4I) += sun4i-tcon.o obj-$(CONFIG_DRM_SUN4I) += sun4i_tv.o +obj-$(CONFIG_DRM_SUN4I) += sun6i_drc.o -obj-$(CONFIG_DRM_SUN4I_BACKEND) += sun4i-backend.o +obj-$(CONFIG_DRM_SUN4I_BACKEND) += sun4i-backend.o obj-$(CONFIG_DRM_SUN4I_HDMI) += sun4i-drm-hdmi.o -obj-$(CONFIG_DRM_SUN8I_MIXER) += sun8i-mixer.o +obj-$(CONFIG_DRM_SUN8I_MIXER) += sun8i-mixer.o diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c index 4fefd8add714..847eecbe4d14 100644 --- a/drivers/gpu/drm/sun4i/sun4i_backend.c +++ b/drivers/gpu/drm/sun4i/sun4i_backend.c @@ -20,6 +20,7 @@ #include <linux/component.h> #include <linux/list.h> +#include <linux/of_device.h> #include <linux/of_graph.h> #include <linux/reset.h> @@ -28,6 +29,11 @@ #include "sun4i_layer.h" #include "sunxi_engine.h" +struct sun4i_backend_quirks { + /* backend <-> TCON muxing selection done in backend */ + bool needs_output_muxing; +}; + static const u32 sunxi_rgb2yuv_coef[12] = { 0x00000107, 0x00000204, 0x00000064, 0x00000108, 0x00003f69, 0x00003ed6, 0x000001c1, 0x00000808, @@ -216,6 +222,13 @@ int sun4i_backend_update_layer_buffer(struct sun4i_backend *backend, paddr = drm_fb_cma_get_gem_addr(fb, state, 0); DRM_DEBUG_DRIVER("Setting buffer address to %pad\n", &paddr); + /* + * backend DMA accesses DRAM directly, bypassing the system + * bus. As such, the address range is different and the buffer + * address needs to be corrected. + */ + paddr -= PHYS_OFFSET; + /* Write the 32 lower bits of the address (in bits) */ lo_paddr = paddr << 3; DRM_DEBUG_DRIVER("Setting address lower bits to 0x%x\n", lo_paddr); @@ -338,6 +351,7 @@ static int sun4i_backend_bind(struct device *dev, struct device *master, struct drm_device *drm = data; struct sun4i_drv *drv = drm->dev_private; struct sun4i_backend *backend; + const struct sun4i_backend_quirks *quirks; struct resource *res; void __iomem *regs; int i, ret; @@ -432,6 +446,27 @@ static int sun4i_backend_bind(struct device *dev, struct device *master, SUN4I_BACKEND_MODCTL_DEBE_EN | SUN4I_BACKEND_MODCTL_START_CTL); + /* Set output selection if needed */ + quirks = of_device_get_match_data(dev); + if (quirks->needs_output_muxing) { + /* + * We assume there is no dynamic muxing of backends + * and TCONs, so we select the backend with same ID. + * + * While dynamic selection might be interesting, since + * the CRTC is tied to the TCON, while the layers are + * tied to the backends, this means, we will need to + * switch between groups of layers. There might not be + * a way to represent this constraint in DRM. + */ + regmap_update_bits(backend->engine.regs, + SUN4I_BACKEND_MODCTL_REG, + SUN4I_BACKEND_MODCTL_OUT_SEL, + (backend->engine.id + ? SUN4I_BACKEND_MODCTL_OUT_LCD1 + : SUN4I_BACKEND_MODCTL_OUT_LCD0)); + } + return 0; err_disable_ram_clk: @@ -479,10 +514,44 @@ static int sun4i_backend_remove(struct platform_device *pdev) return 0; } +static const struct sun4i_backend_quirks sun4i_backend_quirks = { + .needs_output_muxing = true, +}; + +static const struct sun4i_backend_quirks sun5i_backend_quirks = { +}; + +static const struct sun4i_backend_quirks sun6i_backend_quirks = { +}; + +static const struct sun4i_backend_quirks sun7i_backend_quirks = { + .needs_output_muxing = true, +}; + +static const struct sun4i_backend_quirks sun8i_a33_backend_quirks = { +}; + static const struct of_device_id sun4i_backend_of_table[] = { - { .compatible = "allwinner,sun5i-a13-display-backend" }, - { .compatible = "allwinner,sun6i-a31-display-backend" }, - { .compatible = "allwinner,sun8i-a33-display-backend" }, + { + .compatible = "allwinner,sun4i-a10-display-backend", + .data = &sun4i_backend_quirks, + }, + { + .compatible = "allwinner,sun5i-a13-display-backend", + .data = &sun5i_backend_quirks, + }, + { + .compatible = "allwinner,sun6i-a31-display-backend", + .data = &sun6i_backend_quirks, + }, + { + .compatible = "allwinner,sun7i-a20-display-backend", + .data = &sun7i_backend_quirks, + }, + { + .compatible = "allwinner,sun8i-a33-display-backend", + .data = &sun8i_a33_backend_quirks, + }, { } }; MODULE_DEVICE_TABLE(of, sun4i_backend_of_table); diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.h b/drivers/gpu/drm/sun4i/sun4i_backend.h index 21945af67a9d..ac3cc029f5cd 100644 --- a/drivers/gpu/drm/sun4i/sun4i_backend.h +++ b/drivers/gpu/drm/sun4i/sun4i_backend.h @@ -25,7 +25,8 @@ #define SUN4I_BACKEND_MODCTL_LINE_SEL BIT(29) #define SUN4I_BACKEND_MODCTL_ITLMOD_EN BIT(28) #define SUN4I_BACKEND_MODCTL_OUT_SEL GENMASK(22, 20) -#define SUN4I_BACKEND_MODCTL_OUT_LCD (0 << 20) +#define SUN4I_BACKEND_MODCTL_OUT_LCD0 (0 << 20) +#define SUN4I_BACKEND_MODCTL_OUT_LCD1 (1 << 20) #define SUN4I_BACKEND_MODCTL_OUT_FE0 (6 << 20) #define SUN4I_BACKEND_MODCTL_OUT_FE1 (7 << 20) #define SUN4I_BACKEND_MODCTL_HWC_EN BIT(16) diff --git a/drivers/gpu/drm/sun4i/sun4i_crtc.c b/drivers/gpu/drm/sun4i/sun4i_crtc.c index d097c6f93ad0..5decae0069d0 100644 --- a/drivers/gpu/drm/sun4i/sun4i_crtc.c +++ b/drivers/gpu/drm/sun4i/sun4i_crtc.c @@ -30,6 +30,22 @@ #include "sunxi_engine.h" #include "sun4i_tcon.h" +/* + * While this isn't really working in the DRM theory, in practice we + * can only ever have one encoder per TCON since we have a mux in our + * TCON. + */ +static struct drm_encoder *sun4i_crtc_get_encoder(struct drm_crtc *crtc) +{ + struct drm_encoder *encoder; + + drm_for_each_encoder(encoder, crtc->dev) + if (encoder->crtc == crtc) + return encoder; + + return NULL; +} + static void sun4i_crtc_atomic_begin(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { @@ -72,11 +88,12 @@ static void sun4i_crtc_atomic_flush(struct drm_crtc *crtc, static void sun4i_crtc_atomic_disable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { + struct drm_encoder *encoder = sun4i_crtc_get_encoder(crtc); struct sun4i_crtc *scrtc = drm_crtc_to_sun4i_crtc(crtc); DRM_DEBUG_DRIVER("Disabling the CRTC\n"); - sun4i_tcon_disable(scrtc->tcon); + sun4i_tcon_set_status(scrtc->tcon, encoder, false); if (crtc->state->event && !crtc->state->active) { spin_lock_irq(&crtc->dev->event_lock); @@ -90,11 +107,21 @@ static void sun4i_crtc_atomic_disable(struct drm_crtc *crtc, static void sun4i_crtc_atomic_enable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { + struct drm_encoder *encoder = sun4i_crtc_get_encoder(crtc); struct sun4i_crtc *scrtc = drm_crtc_to_sun4i_crtc(crtc); DRM_DEBUG_DRIVER("Enabling the CRTC\n"); - sun4i_tcon_enable(scrtc->tcon); + sun4i_tcon_set_status(scrtc->tcon, encoder, true); +} + +static void sun4i_crtc_mode_set_nofb(struct drm_crtc *crtc) +{ + struct drm_display_mode *mode = &crtc->state->adjusted_mode; + struct drm_encoder *encoder = sun4i_crtc_get_encoder(crtc); + struct sun4i_crtc *scrtc = drm_crtc_to_sun4i_crtc(crtc); + + sun4i_tcon_mode_set(scrtc->tcon, encoder, mode); } static const struct drm_crtc_helper_funcs sun4i_crtc_helper_funcs = { @@ -102,6 +129,7 @@ static const struct drm_crtc_helper_funcs sun4i_crtc_helper_funcs = { .atomic_flush = sun4i_crtc_atomic_flush, .atomic_enable = sun4i_crtc_atomic_enable, .atomic_disable = sun4i_crtc_atomic_disable, + .mode_set_nofb = sun4i_crtc_mode_set_nofb, }; static int sun4i_crtc_enable_vblank(struct drm_crtc *crtc) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index b5879d4620d8..75c76cdd82bc 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -11,6 +11,7 @@ */ #include <linux/component.h> +#include <linux/kfifo.h> #include <linux/of_graph.h> #include <linux/of_reserved_mem.h> @@ -177,16 +178,20 @@ static bool sun4i_drv_node_is_connector(struct device_node *node) static bool sun4i_drv_node_is_frontend(struct device_node *node) { - return of_device_is_compatible(node, "allwinner,sun5i-a13-display-frontend") || + return of_device_is_compatible(node, "allwinner,sun4i-a10-display-frontend") || + of_device_is_compatible(node, "allwinner,sun5i-a13-display-frontend") || of_device_is_compatible(node, "allwinner,sun6i-a31-display-frontend") || + of_device_is_compatible(node, "allwinner,sun7i-a20-display-frontend") || of_device_is_compatible(node, "allwinner,sun8i-a33-display-frontend"); } static bool sun4i_drv_node_is_tcon(struct device_node *node) { - return of_device_is_compatible(node, "allwinner,sun5i-a13-tcon") || + return of_device_is_compatible(node, "allwinner,sun4i-a10-tcon") || + of_device_is_compatible(node, "allwinner,sun5i-a13-tcon") || of_device_is_compatible(node, "allwinner,sun6i-a31-tcon") || of_device_is_compatible(node, "allwinner,sun6i-a31s-tcon") || + of_device_is_compatible(node, "allwinner,sun7i-a20-tcon") || of_device_is_compatible(node, "allwinner,sun8i-a33-tcon") || of_device_is_compatible(node, "allwinner,sun8i-v3s-tcon"); } @@ -222,29 +227,15 @@ static int compare_of(struct device *dev, void *data) * matching system handles this for us. */ struct endpoint_list { - struct device_node *node; - struct list_head list; + DECLARE_KFIFO(fifo, struct device_node *, 16); }; -static bool node_is_in_list(struct list_head *endpoints, - struct device_node *node) -{ - struct endpoint_list *endpoint; - - list_for_each_entry(endpoint, endpoints, list) - if (endpoint->node == node) - return true; - - return false; -} - static int sun4i_drv_add_endpoints(struct device *dev, - struct list_head *endpoints, + struct endpoint_list *list, struct component_match **match, struct device_node *node) { struct device_node *port, *ep, *remote; - struct endpoint_list *endpoint; int count = 0; /* @@ -304,19 +295,7 @@ static int sun4i_drv_add_endpoints(struct device *dev, } } - /* skip downstream node if it is already in the queue */ - if (node_is_in_list(endpoints, remote)) - continue; - - /* Add downstream nodes to the queue */ - endpoint = kzalloc(sizeof(*endpoint), GFP_KERNEL); - if (!endpoint) { - of_node_put(remote); - return -ENOMEM; - } - - endpoint->node = remote; - list_add_tail(&endpoint->list, endpoints); + kfifo_put(&list->fifo, remote); } return count; @@ -325,10 +304,11 @@ static int sun4i_drv_add_endpoints(struct device *dev, static int sun4i_drv_probe(struct platform_device *pdev) { struct component_match *match = NULL; - struct device_node *np = pdev->dev.of_node; - struct endpoint_list *endpoint, *endpoint_temp; + struct device_node *np = pdev->dev.of_node, *endpoint; + struct endpoint_list list; int i, ret, count = 0; - LIST_HEAD(endpoints); + + INIT_KFIFO(list.fifo); for (i = 0;; i++) { struct device_node *pipeline = of_parse_phandle(np, @@ -337,31 +317,19 @@ static int sun4i_drv_probe(struct platform_device *pdev) if (!pipeline) break; - endpoint = kzalloc(sizeof(*endpoint), GFP_KERNEL); - if (!endpoint) { - ret = -ENOMEM; - goto err_free_endpoints; - } - - endpoint->node = pipeline; - list_add_tail(&endpoint->list, &endpoints); + kfifo_put(&list.fifo, pipeline); } - list_for_each_entry_safe(endpoint, endpoint_temp, &endpoints, list) { + while (kfifo_get(&list.fifo, &endpoint)) { /* process this endpoint */ - ret = sun4i_drv_add_endpoints(&pdev->dev, &endpoints, &match, - endpoint->node); + ret = sun4i_drv_add_endpoints(&pdev->dev, &list, &match, + endpoint); /* sun4i_drv_add_endpoints can fail to allocate memory */ if (ret < 0) - goto err_free_endpoints; + return ret; count += ret; - - /* delete and cleanup the current entry */ - list_del(&endpoint->list); - of_node_put(endpoint->node); - kfree(endpoint); } if (count) @@ -370,15 +338,6 @@ static int sun4i_drv_probe(struct platform_device *pdev) match); else return 0; - -err_free_endpoints: - list_for_each_entry_safe(endpoint, endpoint_temp, &endpoints, list) { - list_del(&endpoint->list); - of_node_put(endpoint->node); - kfree(endpoint); - } - - return ret; } static int sun4i_drv_remove(struct platform_device *pdev) @@ -387,10 +346,12 @@ static int sun4i_drv_remove(struct platform_device *pdev) } static const struct of_device_id sun4i_drv_of_table[] = { + { .compatible = "allwinner,sun4i-a10-display-engine" }, { .compatible = "allwinner,sun5i-a10s-display-engine" }, { .compatible = "allwinner,sun5i-a13-display-engine" }, { .compatible = "allwinner,sun6i-a31-display-engine" }, { .compatible = "allwinner,sun6i-a31s-display-engine" }, + { .compatible = "allwinner,sun7i-a20-display-engine" }, { .compatible = "allwinner,sun8i-a33-display-engine" }, { .compatible = "allwinner,sun8i-v3s-display-engine" }, { } diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_ddc_clk.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_ddc_clk.c index 04f85b1cf922..e826da34e919 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_ddc_clk.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_ddc_clk.c @@ -13,7 +13,6 @@ #include <linux/clk-provider.h> #include <linux/regmap.h> -#include "sun4i_tcon.h" #include "sun4i_hdmi.h" struct sun4i_ddc { diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c index 6ca6e6a74c4a..dda904ec0534 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c @@ -30,7 +30,6 @@ #include "sun4i_crtc.h" #include "sun4i_drv.h" #include "sun4i_hdmi.h" -#include "sun4i_tcon.h" static inline struct sun4i_hdmi * drm_encoder_to_sun4i_hdmi(struct drm_encoder *encoder) @@ -86,8 +85,6 @@ static int sun4i_hdmi_atomic_check(struct drm_encoder *encoder, static void sun4i_hdmi_disable(struct drm_encoder *encoder) { struct sun4i_hdmi *hdmi = drm_encoder_to_sun4i_hdmi(encoder); - struct sun4i_crtc *crtc = drm_crtc_to_sun4i_crtc(encoder->crtc); - struct sun4i_tcon *tcon = crtc->tcon; u32 val; DRM_DEBUG_DRIVER("Disabling the HDMI Output\n"); @@ -95,22 +92,16 @@ static void sun4i_hdmi_disable(struct drm_encoder *encoder) val = readl(hdmi->base + SUN4I_HDMI_VID_CTRL_REG); val &= ~SUN4I_HDMI_VID_CTRL_ENABLE; writel(val, hdmi->base + SUN4I_HDMI_VID_CTRL_REG); - - sun4i_tcon_channel_disable(tcon, 1); } static void sun4i_hdmi_enable(struct drm_encoder *encoder) { struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; struct sun4i_hdmi *hdmi = drm_encoder_to_sun4i_hdmi(encoder); - struct sun4i_crtc *crtc = drm_crtc_to_sun4i_crtc(encoder->crtc); - struct sun4i_tcon *tcon = crtc->tcon; u32 val = 0; DRM_DEBUG_DRIVER("Enabling the HDMI Output\n"); - sun4i_tcon_channel_enable(tcon, 1); - sun4i_hdmi_setup_avi_infoframes(hdmi, mode); val |= SUN4I_HDMI_PKT_CTRL_TYPE(0, SUN4I_HDMI_PKT_AVI); val |= SUN4I_HDMI_PKT_CTRL_TYPE(1, SUN4I_HDMI_PKT_END); @@ -128,15 +119,9 @@ static void sun4i_hdmi_mode_set(struct drm_encoder *encoder, struct drm_display_mode *adjusted_mode) { struct sun4i_hdmi *hdmi = drm_encoder_to_sun4i_hdmi(encoder); - struct sun4i_crtc *crtc = drm_crtc_to_sun4i_crtc(encoder->crtc); - struct sun4i_tcon *tcon = crtc->tcon; unsigned int x, y; u32 val; - sun4i_tcon1_mode_set(tcon, mode); - sun4i_tcon_set_mux(tcon, 1, encoder); - - clk_set_rate(tcon->sclk1, mode->crtc_clock * 1000); clk_set_rate(hdmi->mod_clk, mode->crtc_clock * 1000); clk_set_rate(hdmi->tmds_clk, mode->crtc_clock * 1000); @@ -289,6 +274,58 @@ static const struct cec_pin_ops sun4i_hdmi_cec_pin_ops = { #define SUN4I_HDMI_PAD_CTRL1_MASK (GENMASK(24, 7) | GENMASK(5, 0)) #define SUN4I_HDMI_PLL_CTRL_MASK (GENMASK(31, 8) | GENMASK(3, 0)) +/* Only difference from sun5i is AMP is 4 instead of 6 */ +static const struct sun4i_hdmi_variant sun4i_variant = { + .pad_ctrl0_init_val = SUN4I_HDMI_PAD_CTRL0_TXEN | + SUN4I_HDMI_PAD_CTRL0_CKEN | + SUN4I_HDMI_PAD_CTRL0_PWENG | + SUN4I_HDMI_PAD_CTRL0_PWEND | + SUN4I_HDMI_PAD_CTRL0_PWENC | + SUN4I_HDMI_PAD_CTRL0_LDODEN | + SUN4I_HDMI_PAD_CTRL0_LDOCEN | + SUN4I_HDMI_PAD_CTRL0_BIASEN, + .pad_ctrl1_init_val = SUN4I_HDMI_PAD_CTRL1_REG_AMP(4) | + SUN4I_HDMI_PAD_CTRL1_REG_EMP(2) | + SUN4I_HDMI_PAD_CTRL1_REG_DENCK | + SUN4I_HDMI_PAD_CTRL1_REG_DEN | + SUN4I_HDMI_PAD_CTRL1_EMPCK_OPT | + SUN4I_HDMI_PAD_CTRL1_EMP_OPT | + SUN4I_HDMI_PAD_CTRL1_AMPCK_OPT | + SUN4I_HDMI_PAD_CTRL1_AMP_OPT, + .pll_ctrl_init_val = SUN4I_HDMI_PLL_CTRL_VCO_S(8) | + SUN4I_HDMI_PLL_CTRL_CS(7) | + SUN4I_HDMI_PLL_CTRL_CP_S(15) | + SUN4I_HDMI_PLL_CTRL_S(7) | + SUN4I_HDMI_PLL_CTRL_VCO_GAIN(4) | + SUN4I_HDMI_PLL_CTRL_SDIV2 | + SUN4I_HDMI_PLL_CTRL_LDO2_EN | + SUN4I_HDMI_PLL_CTRL_LDO1_EN | + SUN4I_HDMI_PLL_CTRL_HV_IS_33 | + SUN4I_HDMI_PLL_CTRL_BWS | + SUN4I_HDMI_PLL_CTRL_PLL_EN, + + .ddc_clk_reg = REG_FIELD(SUN4I_HDMI_DDC_CLK_REG, 0, 6), + .ddc_clk_pre_divider = 2, + .ddc_clk_m_offset = 1, + + .field_ddc_en = REG_FIELD(SUN4I_HDMI_DDC_CTRL_REG, 31, 31), + .field_ddc_start = REG_FIELD(SUN4I_HDMI_DDC_CTRL_REG, 30, 30), + .field_ddc_reset = REG_FIELD(SUN4I_HDMI_DDC_CTRL_REG, 0, 0), + .field_ddc_addr_reg = REG_FIELD(SUN4I_HDMI_DDC_ADDR_REG, 0, 31), + .field_ddc_slave_addr = REG_FIELD(SUN4I_HDMI_DDC_ADDR_REG, 0, 6), + .field_ddc_int_status = REG_FIELD(SUN4I_HDMI_DDC_INT_STATUS_REG, 0, 8), + .field_ddc_fifo_clear = REG_FIELD(SUN4I_HDMI_DDC_FIFO_CTRL_REG, 31, 31), + .field_ddc_fifo_rx_thres = REG_FIELD(SUN4I_HDMI_DDC_FIFO_CTRL_REG, 4, 7), + .field_ddc_fifo_tx_thres = REG_FIELD(SUN4I_HDMI_DDC_FIFO_CTRL_REG, 0, 3), + .field_ddc_byte_count = REG_FIELD(SUN4I_HDMI_DDC_BYTE_COUNT_REG, 0, 9), + .field_ddc_cmd = REG_FIELD(SUN4I_HDMI_DDC_CMD_REG, 0, 2), + .field_ddc_sda_en = REG_FIELD(SUN4I_HDMI_DDC_LINE_CTRL_REG, 9, 9), + .field_ddc_sck_en = REG_FIELD(SUN4I_HDMI_DDC_LINE_CTRL_REG, 8, 8), + + .ddc_fifo_reg = SUN4I_HDMI_DDC_FIFO_DATA_REG, + .ddc_fifo_has_dir = true, +}; + static const struct sun4i_hdmi_variant sun5i_variant = { .pad_ctrl0_init_val = SUN4I_HDMI_PAD_CTRL0_TXEN | SUN4I_HDMI_PAD_CTRL0_CKEN | @@ -613,6 +650,7 @@ static int sun4i_hdmi_remove(struct platform_device *pdev) } static const struct of_device_id sun4i_hdmi_of_table[] = { + { .compatible = "allwinner,sun4i-a10-hdmi", .data = &sun4i_variant, }, { .compatible = "allwinner,sun5i-a10s-hdmi", .data = &sun5i_variant, }, { .compatible = "allwinner,sun6i-a31-hdmi", .data = &sun6i_variant, }, { } diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c index 1b6b37aefc38..dc332ea56f6c 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c @@ -12,7 +12,6 @@ #include <linux/clk-provider.h> -#include "sun4i_tcon.h" #include "sun4i_hdmi.h" struct sun4i_tmds { diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c index 7cd7090ad63a..832f8f9bc47f 100644 --- a/drivers/gpu/drm/sun4i/sun4i_rgb.c +++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c @@ -134,13 +134,10 @@ static void sun4i_rgb_encoder_enable(struct drm_encoder *encoder) DRM_DEBUG_DRIVER("Enabling RGB output\n"); - if (!IS_ERR(tcon->panel)) + if (!IS_ERR(tcon->panel)) { drm_panel_prepare(tcon->panel); - - sun4i_tcon_channel_enable(tcon, 0); - - if (!IS_ERR(tcon->panel)) drm_panel_enable(tcon->panel); + } } static void sun4i_rgb_encoder_disable(struct drm_encoder *encoder) @@ -150,31 +147,13 @@ static void sun4i_rgb_encoder_disable(struct drm_encoder *encoder) DRM_DEBUG_DRIVER("Disabling RGB output\n"); - if (!IS_ERR(tcon->panel)) + if (!IS_ERR(tcon->panel)) { drm_panel_disable(tcon->panel); - - sun4i_tcon_channel_disable(tcon, 0); - - if (!IS_ERR(tcon->panel)) drm_panel_unprepare(tcon->panel); -} - -static void sun4i_rgb_encoder_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - struct sun4i_rgb *rgb = drm_encoder_to_sun4i_rgb(encoder); - struct sun4i_tcon *tcon = rgb->tcon; - - sun4i_tcon0_mode_set(tcon, mode); - sun4i_tcon_set_mux(tcon, 0, encoder); - - /* FIXME: This seems to be board specific */ - clk_set_phase(tcon->dclk, 120); + } } static struct drm_encoder_helper_funcs sun4i_rgb_enc_helper_funcs = { - .mode_set = sun4i_rgb_encoder_mode_set, .disable = sun4i_rgb_encoder_disable, .enable = sun4i_rgb_encoder_enable, }; diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index 68751b999877..e122f5b2a395 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -35,66 +35,61 @@ #include "sun4i_tcon.h" #include "sunxi_engine.h" -void sun4i_tcon_disable(struct sun4i_tcon *tcon) +static void sun4i_tcon_channel_set_status(struct sun4i_tcon *tcon, int channel, + bool enabled) { - DRM_DEBUG_DRIVER("Disabling TCON\n"); + struct clk *clk; - /* Disable the TCON */ - regmap_update_bits(tcon->regs, SUN4I_TCON_GCTL_REG, - SUN4I_TCON_GCTL_TCON_ENABLE, 0); -} -EXPORT_SYMBOL(sun4i_tcon_disable); - -void sun4i_tcon_enable(struct sun4i_tcon *tcon) -{ - DRM_DEBUG_DRIVER("Enabling TCON\n"); - - /* Enable the TCON */ - regmap_update_bits(tcon->regs, SUN4I_TCON_GCTL_REG, - SUN4I_TCON_GCTL_TCON_ENABLE, - SUN4I_TCON_GCTL_TCON_ENABLE); -} -EXPORT_SYMBOL(sun4i_tcon_enable); - -void sun4i_tcon_channel_disable(struct sun4i_tcon *tcon, int channel) -{ - DRM_DEBUG_DRIVER("Disabling TCON channel %d\n", channel); - - /* Disable the TCON's channel */ - if (channel == 0) { + switch (channel) { + case 0: regmap_update_bits(tcon->regs, SUN4I_TCON0_CTL_REG, - SUN4I_TCON0_CTL_TCON_ENABLE, 0); - clk_disable_unprepare(tcon->dclk); + SUN4I_TCON0_CTL_TCON_ENABLE, + enabled ? SUN4I_TCON0_CTL_TCON_ENABLE : 0); + clk = tcon->dclk; + break; + case 1: + WARN_ON(!tcon->quirks->has_channel_1); + regmap_update_bits(tcon->regs, SUN4I_TCON1_CTL_REG, + SUN4I_TCON1_CTL_TCON_ENABLE, + enabled ? SUN4I_TCON1_CTL_TCON_ENABLE : 0); + clk = tcon->sclk1; + break; + default: + DRM_WARN("Unknown channel... doing nothing\n"); return; } - WARN_ON(!tcon->quirks->has_channel_1); - regmap_update_bits(tcon->regs, SUN4I_TCON1_CTL_REG, - SUN4I_TCON1_CTL_TCON_ENABLE, 0); - clk_disable_unprepare(tcon->sclk1); + if (enabled) + clk_prepare_enable(clk); + else + clk_disable_unprepare(clk); } -EXPORT_SYMBOL(sun4i_tcon_channel_disable); -void sun4i_tcon_channel_enable(struct sun4i_tcon *tcon, int channel) +void sun4i_tcon_set_status(struct sun4i_tcon *tcon, + const struct drm_encoder *encoder, + bool enabled) { - DRM_DEBUG_DRIVER("Enabling TCON channel %d\n", channel); + int channel; - /* Enable the TCON's channel */ - if (channel == 0) { - regmap_update_bits(tcon->regs, SUN4I_TCON0_CTL_REG, - SUN4I_TCON0_CTL_TCON_ENABLE, - SUN4I_TCON0_CTL_TCON_ENABLE); - clk_prepare_enable(tcon->dclk); + switch (encoder->encoder_type) { + case DRM_MODE_ENCODER_NONE: + channel = 0; + break; + case DRM_MODE_ENCODER_TMDS: + case DRM_MODE_ENCODER_TVDAC: + channel = 1; + break; + default: + DRM_DEBUG_DRIVER("Unknown encoder type, doing nothing...\n"); return; } - WARN_ON(!tcon->quirks->has_channel_1); - regmap_update_bits(tcon->regs, SUN4I_TCON1_CTL_REG, - SUN4I_TCON1_CTL_TCON_ENABLE, - SUN4I_TCON1_CTL_TCON_ENABLE); - clk_prepare_enable(tcon->sclk1); + regmap_update_bits(tcon->regs, SUN4I_TCON_GCTL_REG, + SUN4I_TCON_GCTL_TCON_ENABLE, + enabled ? SUN4I_TCON_GCTL_TCON_ENABLE : 0); + + sun4i_tcon_channel_set_status(tcon, channel, enabled); } -EXPORT_SYMBOL(sun4i_tcon_channel_enable); void sun4i_tcon_enable_vblank(struct sun4i_tcon *tcon, bool enable) { @@ -134,7 +129,7 @@ static struct sun4i_tcon *sun4i_get_tcon0(struct drm_device *drm) } void sun4i_tcon_set_mux(struct sun4i_tcon *tcon, int channel, - struct drm_encoder *encoder) + const struct drm_encoder *encoder) { int ret = -ENOTSUPP; @@ -144,9 +139,8 @@ void sun4i_tcon_set_mux(struct sun4i_tcon *tcon, int channel, DRM_DEBUG_DRIVER("Muxing encoder %s to CRTC %s: %d\n", encoder->name, encoder->crtc->name, ret); } -EXPORT_SYMBOL(sun4i_tcon_set_mux); -static int sun4i_tcon_get_clk_delay(struct drm_display_mode *mode, +static int sun4i_tcon_get_clk_delay(const struct drm_display_mode *mode, int channel) { int delay = mode->vtotal - mode->vdisplay; @@ -164,15 +158,26 @@ static int sun4i_tcon_get_clk_delay(struct drm_display_mode *mode, return delay; } -void sun4i_tcon0_mode_set(struct sun4i_tcon *tcon, - struct drm_display_mode *mode) +static void sun4i_tcon0_mode_set_common(struct sun4i_tcon *tcon, + const struct drm_display_mode *mode) +{ + /* Configure the dot clock */ + clk_set_rate(tcon->dclk, mode->crtc_clock * 1000); + + /* Set the resolution */ + regmap_write(tcon->regs, SUN4I_TCON0_BASIC0_REG, + SUN4I_TCON0_BASIC0_X(mode->crtc_hdisplay) | + SUN4I_TCON0_BASIC0_Y(mode->crtc_vdisplay)); +} + +static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon, + const struct drm_display_mode *mode) { unsigned int bp, hsync, vsync; u8 clk_delay; u32 val = 0; - /* Configure the dot clock */ - clk_set_rate(tcon->dclk, mode->crtc_clock * 1000); + sun4i_tcon0_mode_set_common(tcon, mode); /* Adjust clock delay */ clk_delay = sun4i_tcon_get_clk_delay(mode, 0); @@ -180,11 +185,6 @@ void sun4i_tcon0_mode_set(struct sun4i_tcon *tcon, SUN4I_TCON0_CTL_CLK_DELAY_MASK, SUN4I_TCON0_CTL_CLK_DELAY(clk_delay)); - /* Set the resolution */ - regmap_write(tcon->regs, SUN4I_TCON0_BASIC0_REG, - SUN4I_TCON0_BASIC0_X(mode->crtc_hdisplay) | - SUN4I_TCON0_BASIC0_Y(mode->crtc_vdisplay)); - /* * This is called a backporch in the register documentation, * but it really is the back porch + hsync @@ -238,10 +238,9 @@ void sun4i_tcon0_mode_set(struct sun4i_tcon *tcon, /* Enable the output on the pins */ regmap_write(tcon->regs, SUN4I_TCON0_IO_TRI_REG, 0); } -EXPORT_SYMBOL(sun4i_tcon0_mode_set); -void sun4i_tcon1_mode_set(struct sun4i_tcon *tcon, - struct drm_display_mode *mode) +static void sun4i_tcon1_mode_set(struct sun4i_tcon *tcon, + const struct drm_display_mode *mode) { unsigned int bp, hsync, vsync, vtotal; u8 clk_delay; @@ -329,7 +328,26 @@ void sun4i_tcon1_mode_set(struct sun4i_tcon *tcon, SUN4I_TCON_GCTL_IOMAP_MASK, SUN4I_TCON_GCTL_IOMAP_TCON1); } -EXPORT_SYMBOL(sun4i_tcon1_mode_set); + +void sun4i_tcon_mode_set(struct sun4i_tcon *tcon, + const struct drm_encoder *encoder, + const struct drm_display_mode *mode) +{ + switch (encoder->encoder_type) { + case DRM_MODE_ENCODER_NONE: + sun4i_tcon0_mode_set_rgb(tcon, mode); + sun4i_tcon_set_mux(tcon, 0, encoder); + break; + case DRM_MODE_ENCODER_TVDAC: + case DRM_MODE_ENCODER_TMDS: + sun4i_tcon1_mode_set(tcon, mode); + sun4i_tcon_set_mux(tcon, 1, encoder); + break; + default: + DRM_DEBUG_DRIVER("Unknown encoder type, doing nothing...\n"); + } +} +EXPORT_SYMBOL(sun4i_tcon_mode_set); static void sun4i_tcon_finish_page_flip(struct drm_device *dev, struct sun4i_crtc *scrtc) @@ -782,8 +800,32 @@ static int sun4i_tcon_remove(struct platform_device *pdev) } /* platform specific TCON muxing callbacks */ +static int sun4i_a10_tcon_set_mux(struct sun4i_tcon *tcon, + const struct drm_encoder *encoder) +{ + struct sun4i_tcon *tcon0 = sun4i_get_tcon0(encoder->dev); + u32 shift; + + if (!tcon0) + return -EINVAL; + + switch (encoder->encoder_type) { + case DRM_MODE_ENCODER_TMDS: + /* HDMI */ + shift = 8; + break; + default: + return -EINVAL; + } + + regmap_update_bits(tcon0->regs, SUN4I_TCON_MUX_CTRL_REG, + 0x3 << shift, tcon->id << shift); + + return 0; +} + static int sun5i_a13_tcon_set_mux(struct sun4i_tcon *tcon, - struct drm_encoder *encoder) + const struct drm_encoder *encoder) { u32 val; @@ -799,7 +841,7 @@ static int sun5i_a13_tcon_set_mux(struct sun4i_tcon *tcon, } static int sun6i_tcon_set_mux(struct sun4i_tcon *tcon, - struct drm_encoder *encoder) + const struct drm_encoder *encoder) { struct sun4i_tcon *tcon0 = sun4i_get_tcon0(encoder->dev); u32 shift; @@ -823,6 +865,11 @@ static int sun6i_tcon_set_mux(struct sun4i_tcon *tcon, return 0; } +static const struct sun4i_tcon_quirks sun4i_a10_quirks = { + .has_channel_1 = true, + .set_mux = sun4i_a10_tcon_set_mux, +}; + static const struct sun4i_tcon_quirks sun5i_a13_quirks = { .has_channel_1 = true, .set_mux = sun5i_a13_tcon_set_mux, @@ -839,6 +886,12 @@ static const struct sun4i_tcon_quirks sun6i_a31s_quirks = { .needs_de_be_mux = true, }; +static const struct sun4i_tcon_quirks sun7i_a20_quirks = { + .has_channel_1 = true, + /* Same display pipeline structure as A10 */ + .set_mux = sun4i_a10_tcon_set_mux, +}; + static const struct sun4i_tcon_quirks sun8i_a33_quirks = { /* nothing is supported */ }; @@ -848,9 +901,11 @@ static const struct sun4i_tcon_quirks sun8i_v3s_quirks = { }; static const struct of_device_id sun4i_tcon_of_table[] = { + { .compatible = "allwinner,sun4i-a10-tcon", .data = &sun4i_a10_quirks }, { .compatible = "allwinner,sun5i-a13-tcon", .data = &sun5i_a13_quirks }, { .compatible = "allwinner,sun6i-a31-tcon", .data = &sun6i_a31_quirks }, { .compatible = "allwinner,sun6i-a31s-tcon", .data = &sun6i_a31s_quirks }, + { .compatible = "allwinner,sun7i-a20-tcon", .data = &sun7i_a20_quirks }, { .compatible = "allwinner,sun8i-a33-tcon", .data = &sun8i_a33_quirks }, { .compatible = "allwinner,sun8i-v3s-tcon", .data = &sun8i_v3s_quirks }, { } diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.h b/drivers/gpu/drm/sun4i/sun4i_tcon.h index d9e1357cc8ae..f61bf6d83b4a 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.h +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.h @@ -152,7 +152,7 @@ struct sun4i_tcon_quirks { bool needs_de_be_mux; /* sun6i needs mux to select backend */ /* callback to handle tcon muxing options */ - int (*set_mux)(struct sun4i_tcon *, struct drm_encoder *); + int (*set_mux)(struct sun4i_tcon *, const struct drm_encoder *); }; struct sun4i_tcon { @@ -190,22 +190,11 @@ struct sun4i_tcon { struct drm_bridge *sun4i_tcon_find_bridge(struct device_node *node); struct drm_panel *sun4i_tcon_find_panel(struct device_node *node); -/* Global Control */ -void sun4i_tcon_disable(struct sun4i_tcon *tcon); -void sun4i_tcon_enable(struct sun4i_tcon *tcon); - -/* Channel Control */ -void sun4i_tcon_channel_disable(struct sun4i_tcon *tcon, int channel); -void sun4i_tcon_channel_enable(struct sun4i_tcon *tcon, int channel); - void sun4i_tcon_enable_vblank(struct sun4i_tcon *tcon, bool enable); - -/* Mode Related Controls */ -void sun4i_tcon_set_mux(struct sun4i_tcon *tcon, int channel, - struct drm_encoder *encoder); -void sun4i_tcon0_mode_set(struct sun4i_tcon *tcon, - struct drm_display_mode *mode); -void sun4i_tcon1_mode_set(struct sun4i_tcon *tcon, - struct drm_display_mode *mode); +void sun4i_tcon_mode_set(struct sun4i_tcon *tcon, + const struct drm_encoder *encoder, + const struct drm_display_mode *mode); +void sun4i_tcon_set_status(struct sun4i_tcon *crtc, + const struct drm_encoder *encoder, bool enable); #endif /* __SUN4I_TCON_H__ */ diff --git a/drivers/gpu/drm/sun4i/sun4i_tv.c b/drivers/gpu/drm/sun4i/sun4i_tv.c index 050cfd43c7a0..b070d522ed8d 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tv.c +++ b/drivers/gpu/drm/sun4i/sun4i_tv.c @@ -24,7 +24,6 @@ #include "sun4i_crtc.h" #include "sun4i_drv.h" -#include "sun4i_tcon.h" #include "sunxi_engine.h" #define SUN4I_TVE_EN_REG 0x000 @@ -345,12 +344,9 @@ static void sun4i_tv_disable(struct drm_encoder *encoder) { struct sun4i_tv *tv = drm_encoder_to_sun4i_tv(encoder); struct sun4i_crtc *crtc = drm_crtc_to_sun4i_crtc(encoder->crtc); - struct sun4i_tcon *tcon = crtc->tcon; DRM_DEBUG_DRIVER("Disabling the TV Output\n"); - sun4i_tcon_channel_disable(tcon, 1); - regmap_update_bits(tv->regs, SUN4I_TVE_EN_REG, SUN4I_TVE_EN_ENABLE, 0); @@ -362,7 +358,6 @@ static void sun4i_tv_enable(struct drm_encoder *encoder) { struct sun4i_tv *tv = drm_encoder_to_sun4i_tv(encoder); struct sun4i_crtc *crtc = drm_crtc_to_sun4i_crtc(encoder->crtc); - struct sun4i_tcon *tcon = crtc->tcon; DRM_DEBUG_DRIVER("Enabling the TV Output\n"); @@ -371,8 +366,6 @@ static void sun4i_tv_enable(struct drm_encoder *encoder) regmap_update_bits(tv->regs, SUN4I_TVE_EN_REG, SUN4I_TVE_EN_ENABLE, SUN4I_TVE_EN_ENABLE); - - sun4i_tcon_channel_enable(tcon, 1); } static void sun4i_tv_mode_set(struct drm_encoder *encoder, @@ -380,13 +373,8 @@ static void sun4i_tv_mode_set(struct drm_encoder *encoder, struct drm_display_mode *adjusted_mode) { struct sun4i_tv *tv = drm_encoder_to_sun4i_tv(encoder); - struct sun4i_crtc *crtc = drm_crtc_to_sun4i_crtc(encoder->crtc); - struct sun4i_tcon *tcon = crtc->tcon; const struct tv_mode *tv_mode = sun4i_tv_find_tv_by_mode(mode); - sun4i_tcon1_mode_set(tcon, mode); - sun4i_tcon_set_mux(tcon, 1, encoder); - /* Enable and map the DAC to the output */ regmap_update_bits(tv->regs, SUN4I_TVE_EN_REG, SUN4I_TVE_EN_DAC_MAP_MASK, diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig index dc58ab140151..cf54847a8bd1 100644 --- a/drivers/gpu/drm/tegra/Kconfig +++ b/drivers/gpu/drm/tegra/Kconfig @@ -9,6 +9,7 @@ config DRM_TEGRA select DRM_PANEL select TEGRA_HOST1X select IOMMU_IOVA if IOMMU_SUPPORT + select CEC_CORE if CEC_NOTIFIER help Choose this option if you have an NVIDIA Tegra SoC. diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile index 8927784396e8..46d65d39214d 100644 --- a/drivers/gpu/drm/tegra/Makefile +++ b/drivers/gpu/drm/tegra/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 ccflags-$(CONFIG_DRM_TEGRA_DEBUG) += -DDEBUG tegra-drm-y := \ diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 4df39112e38e..24a5ef4f5bb8 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -10,6 +10,7 @@ #include <linux/clk.h> #include <linux/debugfs.h> #include <linux/iommu.h> +#include <linux/of_device.h> #include <linux/pm_runtime.h> #include <linux/reset.h> @@ -23,16 +24,6 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_plane_helper.h> -struct tegra_dc_soc_info { - bool supports_border_color; - bool supports_interlacing; - bool supports_cursor; - bool supports_block_linear; - unsigned int pitch_align; - bool has_powergate; - bool broken_reset; -}; - struct tegra_plane { struct drm_plane base; unsigned int index; @@ -559,14 +550,21 @@ static int tegra_plane_atomic_check(struct drm_plane *plane, return 0; } -static void tegra_dc_disable_window(struct tegra_dc *dc, int index) +static void tegra_plane_atomic_disable(struct drm_plane *plane, + struct drm_plane_state *old_state) { + struct tegra_dc *dc = to_tegra_dc(old_state->crtc); + struct tegra_plane *p = to_tegra_plane(plane); unsigned long flags; u32 value; + /* rien ne va plus */ + if (!old_state || !old_state->crtc) + return; + spin_lock_irqsave(&dc->lock, flags); - value = WINDOW_A_SELECT << index; + value = WINDOW_A_SELECT << p->index; tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER); value = tegra_dc_readl(dc, DC_WIN_WIN_OPTIONS); @@ -591,7 +589,7 @@ static void tegra_plane_atomic_update(struct drm_plane *plane, return; if (!plane->state->visible) - return tegra_dc_disable_window(dc, p->index); + return tegra_plane_atomic_disable(plane, old_state); memset(&window, 0, sizeof(window)); window.src.x = plane->state->src.x1 >> 16; @@ -627,25 +625,10 @@ static void tegra_plane_atomic_update(struct drm_plane *plane, tegra_dc_setup_window(dc, p->index, &window); } -static void tegra_plane_atomic_disable(struct drm_plane *plane, - struct drm_plane_state *old_state) -{ - struct tegra_plane *p = to_tegra_plane(plane); - struct tegra_dc *dc; - - /* rien ne va plus */ - if (!old_state || !old_state->crtc) - return; - - dc = to_tegra_dc(old_state->crtc); - - tegra_dc_disable_window(dc, p->index); -} - -static const struct drm_plane_helper_funcs tegra_primary_plane_helper_funcs = { +static const struct drm_plane_helper_funcs tegra_plane_helper_funcs = { .atomic_check = tegra_plane_atomic_check, - .atomic_update = tegra_plane_atomic_update, .atomic_disable = tegra_plane_atomic_disable, + .atomic_update = tegra_plane_atomic_update, }; static struct drm_plane *tegra_dc_primary_plane_create(struct drm_device *drm, @@ -685,7 +668,7 @@ static struct drm_plane *tegra_dc_primary_plane_create(struct drm_device *drm, return ERR_PTR(err); } - drm_plane_helper_add(&plane->base, &tegra_primary_plane_helper_funcs); + drm_plane_helper_add(&plane->base, &tegra_plane_helper_funcs); return &plane->base; } @@ -880,12 +863,6 @@ static const uint32_t tegra_overlay_plane_formats[] = { DRM_FORMAT_YUV422, }; -static const struct drm_plane_helper_funcs tegra_overlay_plane_helper_funcs = { - .atomic_check = tegra_plane_atomic_check, - .atomic_update = tegra_plane_atomic_update, - .atomic_disable = tegra_plane_atomic_disable, -}; - static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm, struct tegra_dc *dc, unsigned int index) @@ -913,7 +890,7 @@ static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm, return ERR_PTR(err); } - drm_plane_helper_add(&plane->base, &tegra_overlay_plane_helper_funcs); + drm_plane_helper_add(&plane->base, &tegra_plane_helper_funcs); return &plane->base; } @@ -1161,6 +1138,11 @@ static void tegra_dc_commit_state(struct tegra_dc *dc, value = SHIFT_CLK_DIVIDER(state->div) | PIXEL_CLK_DIVIDER_PCD1; tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL); + + err = clk_set_rate(dc->clk, state->pclk); + if (err < 0) + dev_err(dc->dev, "failed to set clock %pC to %lu Hz: %d\n", + dc->clk, state->pclk, err); } static void tegra_dc_stop(struct tegra_dc *dc) @@ -1756,7 +1738,7 @@ static int tegra_dc_init(struct host1x_client *client) struct drm_plane *cursor = NULL; int err; - dc->syncpt = host1x_syncpt_request(dc->dev, flags); + dc->syncpt = host1x_syncpt_request(client, flags); if (!dc->syncpt) dev_warn(dc->dev, "failed to allocate syncpoint\n"); @@ -1985,7 +1967,6 @@ static int tegra_dc_parse_dt(struct tegra_dc *dc) static int tegra_dc_probe(struct platform_device *pdev) { - const struct of_device_id *id; struct resource *regs; struct tegra_dc *dc; int err; @@ -1994,14 +1975,11 @@ static int tegra_dc_probe(struct platform_device *pdev) if (!dc) return -ENOMEM; - id = of_match_node(tegra_dc_of_match, pdev->dev.of_node); - if (!id) - return -ENODEV; + dc->soc = of_device_get_match_data(&pdev->dev); spin_lock_init(&dc->lock); INIT_LIST_HEAD(&dc->list); dc->dev = &pdev->dev; - dc->soc = id->data; err = tegra_dc_parse_dt(dc); if (err < 0) @@ -2019,8 +1997,22 @@ static int tegra_dc_probe(struct platform_device *pdev) return PTR_ERR(dc->rst); } - if (!dc->soc->broken_reset) - reset_control_assert(dc->rst); + /* assert reset and disable clock */ + if (!dc->soc->broken_reset) { + err = clk_prepare_enable(dc->clk); + if (err < 0) + return err; + + usleep_range(2000, 4000); + + err = reset_control_assert(dc->rst); + if (err < 0) + return err; + + usleep_range(2000, 4000); + + clk_disable_unprepare(dc->clk); + } if (dc->soc->has_powergate) { if (dc->pipe == 0) diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h index 4a268635749b..cb100b6e3282 100644 --- a/drivers/gpu/drm/tegra/dc.h +++ b/drivers/gpu/drm/tegra/dc.h @@ -10,6 +10,126 @@ #ifndef TEGRA_DC_H #define TEGRA_DC_H 1 +#include <linux/host1x.h> + +#include <drm/drm_crtc.h> + +#include "drm.h" + +struct tegra_output; + +struct tegra_dc_stats { + unsigned long frames; + unsigned long vblank; + unsigned long underflow; + unsigned long overflow; +}; + +struct tegra_dc_soc_info { + bool supports_border_color; + bool supports_interlacing; + bool supports_cursor; + bool supports_block_linear; + unsigned int pitch_align; + bool has_powergate; + bool broken_reset; +}; + +struct tegra_dc { + struct host1x_client client; + struct host1x_syncpt *syncpt; + struct device *dev; + spinlock_t lock; + + struct drm_crtc base; + unsigned int powergate; + int pipe; + + struct clk *clk; + struct reset_control *rst; + void __iomem *regs; + int irq; + + struct tegra_output *rgb; + + struct tegra_dc_stats stats; + struct list_head list; + + struct drm_info_list *debugfs_files; + struct drm_minor *minor; + struct dentry *debugfs; + + /* page-flip handling */ + struct drm_pending_vblank_event *event; + + const struct tegra_dc_soc_info *soc; + + struct iommu_domain *domain; +}; + +static inline struct tegra_dc * +host1x_client_to_dc(struct host1x_client *client) +{ + return container_of(client, struct tegra_dc, client); +} + +static inline struct tegra_dc *to_tegra_dc(struct drm_crtc *crtc) +{ + return crtc ? container_of(crtc, struct tegra_dc, base) : NULL; +} + +static inline void tegra_dc_writel(struct tegra_dc *dc, u32 value, + unsigned int offset) +{ + trace_dc_writel(dc->dev, offset, value); + writel(value, dc->regs + (offset << 2)); +} + +static inline u32 tegra_dc_readl(struct tegra_dc *dc, unsigned int offset) +{ + u32 value = readl(dc->regs + (offset << 2)); + + trace_dc_readl(dc->dev, offset, value); + + return value; +} + +struct tegra_dc_window { + struct { + unsigned int x; + unsigned int y; + unsigned int w; + unsigned int h; + } src; + struct { + unsigned int x; + unsigned int y; + unsigned int w; + unsigned int h; + } dst; + unsigned int bits_per_pixel; + unsigned int stride[2]; + unsigned long base[3]; + bool bottom_up; + + struct tegra_bo_tiling tiling; + u32 format; + u32 swap; +}; + +/* from dc.c */ +void tegra_dc_commit(struct tegra_dc *dc); +int tegra_dc_state_setup_clock(struct tegra_dc *dc, + struct drm_crtc_state *crtc_state, + struct clk *clk, unsigned long pclk, + unsigned int div); + +/* from rgb.c */ +int tegra_dc_rgb_probe(struct tegra_dc *dc); +int tegra_dc_rgb_remove(struct tegra_dc *dc); +int tegra_dc_rgb_init(struct drm_device *drm, struct tegra_dc *dc); +int tegra_dc_rgb_exit(struct tegra_dc *dc); + #define DC_CMD_GENERAL_INCR_SYNCPT 0x000 #define DC_CMD_GENERAL_INCR_SYNCPT_CNTRL 0x001 #define SYNCPT_CNTRL_NO_STALL (1 << 8) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 597d563d636a..52552b9b89ef 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -155,8 +155,7 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags) order = __ffs(tegra->domain->pgsize_bitmap); init_iova_domain(&tegra->carveout.domain, 1UL << order, - carveout_start >> order, - carveout_end >> order); + carveout_start >> order); tegra->carveout.shift = iova_shift(&tegra->carveout.domain); tegra->carveout.limit = carveout_end >> tegra->carveout.shift; @@ -386,12 +385,10 @@ int tegra_drm_submit(struct tegra_drm_context *context, unsigned int num_cmdbufs = args->num_cmdbufs; unsigned int num_relocs = args->num_relocs; unsigned int num_waitchks = args->num_waitchks; - struct drm_tegra_cmdbuf __user *cmdbufs = - (void __user *)(uintptr_t)args->cmdbufs; - struct drm_tegra_reloc __user *relocs = - (void __user *)(uintptr_t)args->relocs; - struct drm_tegra_waitchk __user *waitchks = - (void __user *)(uintptr_t)args->waitchks; + struct drm_tegra_cmdbuf __user *user_cmdbufs; + struct drm_tegra_reloc __user *user_relocs; + struct drm_tegra_waitchk __user *user_waitchks; + struct drm_tegra_syncpt __user *user_syncpt; struct drm_tegra_syncpt syncpt; struct host1x *host1x = dev_get_drvdata(drm->dev->parent); struct drm_gem_object **refs; @@ -400,6 +397,11 @@ int tegra_drm_submit(struct tegra_drm_context *context, unsigned int num_refs; int err; + user_cmdbufs = u64_to_user_ptr(args->cmdbufs); + user_relocs = u64_to_user_ptr(args->relocs); + user_waitchks = u64_to_user_ptr(args->waitchks); + user_syncpt = u64_to_user_ptr(args->syncpts); + /* We don't yet support other than one syncpt_incr struct per submit */ if (args->num_syncpts != 1) return -EINVAL; @@ -440,7 +442,7 @@ int tegra_drm_submit(struct tegra_drm_context *context, struct tegra_bo *obj; u64 offset; - if (copy_from_user(&cmdbuf, cmdbufs, sizeof(cmdbuf))) { + if (copy_from_user(&cmdbuf, user_cmdbufs, sizeof(cmdbuf))) { err = -EFAULT; goto fail; } @@ -476,7 +478,7 @@ int tegra_drm_submit(struct tegra_drm_context *context, host1x_job_add_gather(job, bo, cmdbuf.words, cmdbuf.offset); num_cmdbufs--; - cmdbufs++; + user_cmdbufs++; } /* copy and resolve relocations from submit */ @@ -485,7 +487,7 @@ int tegra_drm_submit(struct tegra_drm_context *context, struct tegra_bo *obj; err = host1x_reloc_copy_from_user(&job->relocarray[num_relocs], - &relocs[num_relocs], drm, + &user_relocs[num_relocs], drm, file); if (err < 0) goto fail; @@ -519,9 +521,8 @@ int tegra_drm_submit(struct tegra_drm_context *context, struct host1x_waitchk *wait = &job->waitchk[num_waitchks]; struct tegra_bo *obj; - err = host1x_waitchk_copy_from_user(wait, - &waitchks[num_waitchks], - file); + err = host1x_waitchk_copy_from_user( + wait, &user_waitchks[num_waitchks], file); if (err < 0) goto fail; @@ -539,8 +540,7 @@ int tegra_drm_submit(struct tegra_drm_context *context, } } - if (copy_from_user(&syncpt, (void __user *)(uintptr_t)args->syncpts, - sizeof(syncpt))) { + if (copy_from_user(&syncpt, user_syncpt, sizeof(syncpt))) { err = -EFAULT; goto fail; } @@ -1317,6 +1317,7 @@ static const struct of_device_id host1x_drm_subdevs[] = { { .compatible = "nvidia,tegra210-sor", }, { .compatible = "nvidia,tegra210-sor1", }, { .compatible = "nvidia,tegra210-vic", }, + { .compatible = "nvidia,tegra186-vic", }, { /* sentinel */ } }; diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h index 063f5d397526..ddae331ad8b6 100644 --- a/drivers/gpu/drm/tegra/drm.h +++ b/drivers/gpu/drm/tegra/drm.h @@ -119,104 +119,7 @@ void *tegra_drm_alloc(struct tegra_drm *tegra, size_t size, dma_addr_t *iova); void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt, dma_addr_t iova); -struct tegra_dc_soc_info; -struct tegra_output; - -struct tegra_dc_stats { - unsigned long frames; - unsigned long vblank; - unsigned long underflow; - unsigned long overflow; -}; - -struct tegra_dc { - struct host1x_client client; - struct host1x_syncpt *syncpt; - struct device *dev; - spinlock_t lock; - - struct drm_crtc base; - unsigned int powergate; - int pipe; - - struct clk *clk; - struct reset_control *rst; - void __iomem *regs; - int irq; - - struct tegra_output *rgb; - - struct tegra_dc_stats stats; - struct list_head list; - - struct drm_info_list *debugfs_files; - struct drm_minor *minor; - struct dentry *debugfs; - - /* page-flip handling */ - struct drm_pending_vblank_event *event; - - const struct tegra_dc_soc_info *soc; - - struct iommu_domain *domain; -}; - -static inline struct tegra_dc * -host1x_client_to_dc(struct host1x_client *client) -{ - return container_of(client, struct tegra_dc, client); -} - -static inline struct tegra_dc *to_tegra_dc(struct drm_crtc *crtc) -{ - return crtc ? container_of(crtc, struct tegra_dc, base) : NULL; -} - -static inline void tegra_dc_writel(struct tegra_dc *dc, u32 value, - unsigned int offset) -{ - trace_dc_writel(dc->dev, offset, value); - writel(value, dc->regs + (offset << 2)); -} - -static inline u32 tegra_dc_readl(struct tegra_dc *dc, unsigned int offset) -{ - u32 value = readl(dc->regs + (offset << 2)); - - trace_dc_readl(dc->dev, offset, value); - - return value; -} - -struct tegra_dc_window { - struct { - unsigned int x; - unsigned int y; - unsigned int w; - unsigned int h; - } src; - struct { - unsigned int x; - unsigned int y; - unsigned int w; - unsigned int h; - } dst; - unsigned int bits_per_pixel; - unsigned int stride[2]; - unsigned long base[3]; - bool bottom_up; - - struct tegra_bo_tiling tiling; - u32 format; - u32 swap; -}; - -/* from dc.c */ -void tegra_dc_commit(struct tegra_dc *dc); -int tegra_dc_state_setup_clock(struct tegra_dc *dc, - struct drm_crtc_state *crtc_state, - struct clk *clk, unsigned long pclk, - unsigned int div); +struct cec_notifier; struct tegra_output { struct device_node *of_node; @@ -225,6 +128,7 @@ struct tegra_output { struct drm_panel *panel; struct i2c_adapter *ddc; const struct edid *edid; + struct cec_notifier *notifier; unsigned int hpd_irq; int hpd_gpio; enum of_gpio_flags hpd_gpio_flags; @@ -243,12 +147,6 @@ static inline struct tegra_output *connector_to_output(struct drm_connector *c) return container_of(c, struct tegra_output, connector); } -/* from rgb.c */ -int tegra_dc_rgb_probe(struct tegra_dc *dc); -int tegra_dc_rgb_remove(struct tegra_dc *dc); -int tegra_dc_rgb_init(struct drm_device *drm, struct tegra_dc *dc); -int tegra_dc_rgb_exit(struct tegra_dc *dc); - /* from output.c */ int tegra_output_probe(struct tegra_output *output); void tegra_output_remove(struct tegra_output *output); diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c index 6ea070da7718..9a8ea93016a9 100644 --- a/drivers/gpu/drm/tegra/gr2d.c +++ b/drivers/gpu/drm/tegra/gr2d.c @@ -36,7 +36,7 @@ static int gr2d_init(struct host1x_client *client) if (!gr2d->channel) return -ENOMEM; - client->syncpts[0] = host1x_syncpt_request(client->dev, flags); + client->syncpts[0] = host1x_syncpt_request(client, flags); if (!client->syncpts[0]) { host1x_channel_put(gr2d->channel); return -ENOMEM; diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c index cee2ab645cde..28c4ef63065b 100644 --- a/drivers/gpu/drm/tegra/gr3d.c +++ b/drivers/gpu/drm/tegra/gr3d.c @@ -46,7 +46,7 @@ static int gr3d_init(struct host1x_client *client) if (!gr3d->channel) return -ENOMEM; - client->syncpts[0] = host1x_syncpt_request(client->dev, flags); + client->syncpts[0] = host1x_syncpt_request(client, flags); if (!client->syncpts[0]) { host1x_channel_put(gr3d->channel); return -ENOMEM; diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c index 5b9d83b71943..6434b3d3d1ba 100644 --- a/drivers/gpu/drm/tegra/hdmi.c +++ b/drivers/gpu/drm/tegra/hdmi.c @@ -11,6 +11,7 @@ #include <linux/debugfs.h> #include <linux/gpio.h> #include <linux/hdmi.h> +#include <linux/of_device.h> #include <linux/pm_runtime.h> #include <linux/regulator/consumer.h> #include <linux/reset.h> @@ -21,6 +22,8 @@ #include <sound/hda_verbs.h> +#include <media/cec-notifier.h> + #include "hdmi.h" #include "drm.h" #include "dc.h" @@ -1663,20 +1666,15 @@ static irqreturn_t tegra_hdmi_irq(int irq, void *data) static int tegra_hdmi_probe(struct platform_device *pdev) { - const struct of_device_id *match; struct tegra_hdmi *hdmi; struct resource *regs; int err; - match = of_match_node(tegra_hdmi_of_match, pdev->dev.of_node); - if (!match) - return -ENODEV; - hdmi = devm_kzalloc(&pdev->dev, sizeof(*hdmi), GFP_KERNEL); if (!hdmi) return -ENOMEM; - hdmi->config = match->data; + hdmi->config = of_device_get_match_data(&pdev->dev); hdmi->dev = &pdev->dev; hdmi->audio_source = AUTO; @@ -1725,6 +1723,10 @@ static int tegra_hdmi_probe(struct platform_device *pdev) return PTR_ERR(hdmi->vdd); } + hdmi->output.notifier = cec_notifier_get(&pdev->dev); + if (hdmi->output.notifier == NULL) + return -ENOMEM; + hdmi->output.dev = &pdev->dev; err = tegra_output_probe(&hdmi->output); @@ -1783,6 +1785,9 @@ static int tegra_hdmi_remove(struct platform_device *pdev) tegra_output_remove(&hdmi->output); + if (hdmi->output.notifier) + cec_notifier_put(hdmi->output.notifier); + return 0; } diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c index 595d1ec3e02e..1cfbacea8113 100644 --- a/drivers/gpu/drm/tegra/output.c +++ b/drivers/gpu/drm/tegra/output.c @@ -11,6 +11,8 @@ #include <drm/drm_panel.h> #include "drm.h" +#include <media/cec-notifier.h> + int tegra_output_connector_get_modes(struct drm_connector *connector) { struct tegra_output *output = connector_to_output(connector); @@ -32,6 +34,7 @@ int tegra_output_connector_get_modes(struct drm_connector *connector) else if (output->ddc) edid = drm_get_edid(connector, output->ddc); + cec_notifier_set_phys_addr_from_edid(output->notifier, edid); drm_mode_connector_update_edid_property(connector, edid); if (edid) { @@ -68,6 +71,9 @@ tegra_output_connector_detect(struct drm_connector *connector, bool force) status = connector_status_connected; } + if (status != connector_status_connected) + cec_notifier_phys_addr_invalidate(output->notifier); + return status; } diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index 7ab1d1dc7cd7..4bcacd3f4861 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -2536,20 +2536,17 @@ MODULE_DEVICE_TABLE(of, tegra_sor_of_match); static int tegra_sor_probe(struct platform_device *pdev) { - const struct of_device_id *match; struct device_node *np; struct tegra_sor *sor; struct resource *regs; int err; - match = of_match_device(tegra_sor_of_match, &pdev->dev); - sor = devm_kzalloc(&pdev->dev, sizeof(*sor), GFP_KERNEL); if (!sor) return -ENOMEM; + sor->soc = of_device_get_match_data(&pdev->dev); sor->output.dev = sor->dev = &pdev->dev; - sor->soc = match->data; sor->settings = devm_kmemdup(&pdev->dev, sor->soc->settings, sor->soc->num_settings * diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index 2448229fa653..18024183aa2b 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -167,7 +167,7 @@ static int vic_init(struct host1x_client *client) goto detach_device; } - client->syncpts[0] = host1x_syncpt_request(client->dev, 0); + client->syncpts[0] = host1x_syncpt_request(client, 0); if (!client->syncpts[0]) { err = -ENOMEM; goto free_channel; @@ -270,29 +270,33 @@ static const struct vic_config vic_t210_config = { .firmware = NVIDIA_TEGRA_210_VIC_FIRMWARE, }; +#define NVIDIA_TEGRA_186_VIC_FIRMWARE "nvidia/tegra186/vic04_ucode.bin" + +static const struct vic_config vic_t186_config = { + .firmware = NVIDIA_TEGRA_186_VIC_FIRMWARE, +}; + static const struct of_device_id vic_match[] = { { .compatible = "nvidia,tegra124-vic", .data = &vic_t124_config }, { .compatible = "nvidia,tegra210-vic", .data = &vic_t210_config }, + { .compatible = "nvidia,tegra186-vic", .data = &vic_t186_config }, { }, }; static int vic_probe(struct platform_device *pdev) { - struct vic_config *vic_config = NULL; struct device *dev = &pdev->dev; struct host1x_syncpt **syncpts; struct resource *regs; - const struct of_device_id *match; struct vic *vic; int err; - match = of_match_device(vic_match, dev); - vic_config = (struct vic_config *)match->data; - vic = devm_kzalloc(dev, sizeof(*vic), GFP_KERNEL); if (!vic) return -ENOMEM; + vic->config = of_device_get_match_data(dev); + syncpts = devm_kzalloc(dev, sizeof(*syncpts), GFP_KERNEL); if (!syncpts) return -ENOMEM; @@ -321,7 +325,7 @@ static int vic_probe(struct platform_device *pdev) if (err < 0) return err; - err = falcon_read_firmware(&vic->falcon, vic_config->firmware); + err = falcon_read_firmware(&vic->falcon, vic->config->firmware); if (err < 0) goto exit_falcon; @@ -334,7 +338,6 @@ static int vic_probe(struct platform_device *pdev) vic->client.base.syncpts = syncpts; vic->client.base.num_syncpts = 1; vic->dev = dev; - vic->config = vic_config; INIT_LIST_HEAD(&vic->client.list); vic->client.ops = &vic_ops; @@ -405,3 +408,6 @@ MODULE_FIRMWARE(NVIDIA_TEGRA_124_VIC_FIRMWARE); #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC) MODULE_FIRMWARE(NVIDIA_TEGRA_210_VIC_FIRMWARE); #endif +#if IS_ENABLED(CONFIG_ARCH_TEGRA_186_SOC) +MODULE_FIRMWARE(NVIDIA_TEGRA_186_VIC_FIRMWARE); +#endif diff --git a/drivers/gpu/drm/tilcdc/Makefile b/drivers/gpu/drm/tilcdc/Makefile index 55ebd516728f..b9e1108e5b4e 100644 --- a/drivers/gpu/drm/tilcdc/Makefile +++ b/drivers/gpu/drm/tilcdc/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 ifeq (, $(findstring -W,$(EXTRA_CFLAGS))) ccflags-y += -Werror endif diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index d2589f310437..6ef4d1a1e3a9 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -24,6 +24,7 @@ #include <linux/completion.h> #include <linux/dma-mapping.h> #include <linux/of_graph.h> +#include <linux/math64.h> #include "tilcdc_drv.h" #include "tilcdc_regs.h" @@ -48,6 +49,7 @@ struct tilcdc_crtc { unsigned int lcd_fck_rate; ktime_t last_vblank; + unsigned int hvtotal_us; struct drm_framebuffer *curr_fb; struct drm_framebuffer *next_fb; @@ -292,6 +294,12 @@ static void tilcdc_crtc_set_clk(struct drm_crtc *crtc) LCDC_V2_CORE_CLK_EN); } +uint tilcdc_mode_hvtotal(const struct drm_display_mode *mode) +{ + return (uint) div_u64(1000llu * mode->htotal * mode->vtotal, + mode->clock); +} + static void tilcdc_crtc_set_mode(struct drm_crtc *crtc) { struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc); @@ -459,6 +467,9 @@ static void tilcdc_crtc_set_mode(struct drm_crtc *crtc) drm_framebuffer_get(fb); crtc->hwmode = crtc->state->adjusted_mode; + + tilcdc_crtc->hvtotal_us = + tilcdc_mode_hvtotal(&crtc->hwmode); } static void tilcdc_crtc_enable(struct drm_crtc *crtc) @@ -467,7 +478,6 @@ static void tilcdc_crtc_enable(struct drm_crtc *crtc) struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc); unsigned long flags; - WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); mutex_lock(&tilcdc_crtc->enable_lock); if (tilcdc_crtc->enabled || tilcdc_crtc->shutdown) { mutex_unlock(&tilcdc_crtc->enable_lock); @@ -564,7 +574,6 @@ static void tilcdc_crtc_off(struct drm_crtc *crtc, bool shutdown) static void tilcdc_crtc_disable(struct drm_crtc *crtc) { - WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); tilcdc_crtc_off(crtc, false); } @@ -608,9 +617,7 @@ static void tilcdc_crtc_destroy(struct drm_crtc *crtc) struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc); struct tilcdc_drm_private *priv = crtc->dev->dev_private; - drm_modeset_lock(&crtc->mutex, NULL); - tilcdc_crtc_disable(crtc); - drm_modeset_unlock(&crtc->mutex); + tilcdc_crtc_shutdown(crtc); flush_workqueue(priv->wq); @@ -626,8 +633,6 @@ int tilcdc_crtc_update_fb(struct drm_crtc *crtc, struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc); struct drm_device *dev = crtc->dev; - WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); - if (tilcdc_crtc->event) { dev_err(dev->dev, "already pending page flip!\n"); return -EBUSY; @@ -648,7 +653,7 @@ int tilcdc_crtc_update_fb(struct drm_crtc *crtc, spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags); next_vblank = ktime_add_us(tilcdc_crtc->last_vblank, - 1000000 / crtc->hwmode.vrefresh); + tilcdc_crtc->hvtotal_us); tdiff = ktime_to_us(ktime_sub(next_vblank, ktime_get())); if (tdiff < TILCDC_VBLANK_SAFETY_THRESHOLD_US) @@ -728,11 +733,39 @@ static void tilcdc_crtc_disable_vblank(struct drm_crtc *crtc) { } +static void tilcdc_crtc_reset(struct drm_crtc *crtc) +{ + struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc); + struct drm_device *dev = crtc->dev; + int ret; + + drm_atomic_helper_crtc_reset(crtc); + + /* Turn the raster off if it for some reason is on. */ + pm_runtime_get_sync(dev->dev); + if (tilcdc_read(dev, LCDC_RASTER_CTRL_REG) & LCDC_RASTER_ENABLE) { + /* Enable DMA Frame Done Interrupt */ + tilcdc_write(dev, LCDC_INT_ENABLE_SET_REG, LCDC_FRAME_DONE); + tilcdc_clear_irqstatus(dev, 0xffffffff); + + tilcdc_crtc->frame_done = false; + tilcdc_clear(dev, LCDC_RASTER_CTRL_REG, LCDC_RASTER_ENABLE); + + ret = wait_event_timeout(tilcdc_crtc->frame_done_wq, + tilcdc_crtc->frame_done, + msecs_to_jiffies(500)); + if (ret == 0) + dev_err(dev->dev, "%s: timeout waiting for framedone\n", + __func__); + } + pm_runtime_put_sync(dev->dev); +} + static const struct drm_crtc_funcs tilcdc_crtc_funcs = { .destroy = tilcdc_crtc_destroy, .set_config = drm_atomic_helper_set_config, .page_flip = drm_atomic_helper_page_flip, - .reset = drm_atomic_helper_crtc_reset, + .reset = tilcdc_crtc_reset, .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, .enable_vblank = tilcdc_crtc_enable_vblank, diff --git a/drivers/gpu/drm/tilcdc/tilcdc_panel.c b/drivers/gpu/drm/tilcdc/tilcdc_panel.c index 1813a3623ce6..8eebb5f826a6 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_panel.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_panel.c @@ -418,7 +418,7 @@ static int panel_remove(struct platform_device *pdev) return 0; } -static struct of_device_id panel_of_match[] = { +static const struct of_device_id panel_of_match[] = { { .compatible = "ti,tilcdc,panel", }, { }, }; diff --git a/drivers/gpu/drm/tilcdc/tilcdc_slave_compat.c b/drivers/gpu/drm/tilcdc/tilcdc_slave_compat.c index 623a9140493c..d2b9e5f04724 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_slave_compat.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_slave_compat.c @@ -145,7 +145,6 @@ static struct device_node * __init tilcdc_get_overlay(struct kfree_table *kft) __dtb_tilcdc_slave_compat_begin; static void *overlay_data; struct device_node *overlay; - int ret; if (!size) { pr_warn("%s: No overlay data\n", __func__); @@ -163,13 +162,6 @@ static struct device_node * __init tilcdc_get_overlay(struct kfree_table *kft) return NULL; } - of_node_set_flag(overlay, OF_DETACHED); - ret = of_resolve_phandles(overlay); - if (ret) { - pr_err("%s: Failed to resolve phandles: %d\n", __func__, ret); - return NULL; - } - return overlay; } @@ -204,7 +196,7 @@ static void __init tilcdc_convert_slave_node(void) /* For all memory needed for the overlay tree. This memory can be freed after the overlay has been applied. */ struct kfree_table kft; - int ret; + int ovcs_id, ret; if (kfree_table_init(&kft)) return; @@ -247,9 +239,11 @@ static void __init tilcdc_convert_slave_node(void) tilcdc_node_disable(slave); - ret = of_overlay_create(overlay); + ovcs_id = 0; + ret = of_overlay_apply(overlay, &ovcs_id); if (ret) - pr_err("%s: Creating overlay failed: %d\n", __func__, ret); + pr_err("%s: Applying overlay changeset failed: %d\n", + __func__, ret); else pr_info("%s: ti,tilcdc,slave node successfully converted\n", __func__); diff --git a/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c b/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c index 1e2dfb1b1d6b..7e3643462a08 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c @@ -289,8 +289,6 @@ static const struct tilcdc_module_ops tfp410_module_ops = { * Device: */ -static struct of_device_id tfp410_of_match[]; - static int tfp410_probe(struct platform_device *pdev) { struct device_node *node = pdev->dev.of_node; @@ -375,7 +373,7 @@ static int tfp410_remove(struct platform_device *pdev) return 0; } -static struct of_device_id tfp410_of_match[] = { +static const struct of_device_id tfp410_of_match[] = { { .compatible = "ti,tilcdc,tfp410", }, { }, }; diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile index 4d0c938ff4b2..a60e560804e0 100644 --- a/drivers/gpu/drm/ttm/Makefile +++ b/drivers/gpu/drm/ttm/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the drm device driver. This driver provides support for the diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d79607a1187c..c088703777e2 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -150,8 +150,7 @@ static void ttm_bo_release_list(struct kref *list_kref) ttm_tt_destroy(bo->ttm); atomic_dec(&bo->glob->bo_count); dma_fence_put(bo->moving); - if (bo->resv == &bo->ttm_resv) - reservation_object_fini(&bo->ttm_resv); + reservation_object_fini(&bo->ttm_resv); mutex_destroy(&bo->wu_mutex); if (bo->destroy) bo->destroy(bo); @@ -402,14 +401,11 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) if (bo->resv == &bo->ttm_resv) return 0; - reservation_object_init(&bo->ttm_resv); BUG_ON(!reservation_object_trylock(&bo->ttm_resv)); r = reservation_object_copy_fences(&bo->ttm_resv, bo->resv); - if (r) { + if (r) reservation_object_unlock(&bo->ttm_resv); - reservation_object_fini(&bo->ttm_resv); - } return r; } @@ -459,6 +455,7 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) spin_unlock(&glob->lru_lock); if (bo->resv != &bo->ttm_resv) reservation_object_unlock(&bo->ttm_resv); + ttm_bo_cleanup_memtype_use(bo); return; } @@ -557,8 +554,6 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, } ttm_bo_del_from_lru(bo); - if (!list_empty(&bo->ddestroy) && (bo->resv != &bo->ttm_resv)) - reservation_object_fini(&bo->ttm_resv); list_del_init(&bo->ddestroy); kref_put(&bo->list_kref, ttm_bo_ref_bug); @@ -1207,8 +1202,8 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev, lockdep_assert_held(&bo->resv->lock.base); } else { bo->resv = &bo->ttm_resv; - reservation_object_init(&bo->ttm_resv); } + reservation_object_init(&bo->ttm_resv); atomic_inc(&bo->glob->bo_count); drm_vma_node_reset(&bo->vma_node); bo->priority = 0; diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 78cb99be7146..e7a519f1849b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -474,6 +474,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, INIT_LIST_HEAD(&fbo->lru); INIT_LIST_HEAD(&fbo->swap); INIT_LIST_HEAD(&fbo->io_reserve_lru); + mutex_init(&fbo->wu_mutex); fbo->moving = NULL; drm_vma_node_reset(&fbo->vma_node); atomic_set(&fbo->cpu_writers, 0); diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index e11fd76e06f4..316f831ad5f0 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -95,7 +95,7 @@ struct ttm_pool_opts { unsigned small; }; -#define NUM_POOLS 4 +#define NUM_POOLS 6 /** * struct ttm_pool_manager - Holds memory pools for fst allocation @@ -122,6 +122,8 @@ struct ttm_pool_manager { struct ttm_page_pool uc_pool; struct ttm_page_pool wc_pool_dma32; struct ttm_page_pool uc_pool_dma32; + struct ttm_page_pool wc_pool_huge; + struct ttm_page_pool uc_pool_huge; } ; }; }; @@ -256,8 +258,8 @@ static int set_pages_array_uc(struct page **pages, int addrinarray) /** * Select the right pool or requested caching state and ttm flags. */ -static struct ttm_page_pool *ttm_get_pool(int flags, - enum ttm_caching_state cstate) +static struct ttm_page_pool *ttm_get_pool(int flags, bool huge, + enum ttm_caching_state cstate) { int pool_index; @@ -269,9 +271,15 @@ static struct ttm_page_pool *ttm_get_pool(int flags, else pool_index = 0x1; - if (flags & TTM_PAGE_FLAG_DMA32) + if (flags & TTM_PAGE_FLAG_DMA32) { + if (huge) + return NULL; pool_index |= 0x2; + } else if (huge) { + pool_index |= 0x4; + } + return &_manager->pools[pool_index]; } @@ -321,7 +329,7 @@ static int ttm_page_pool_free(struct ttm_page_pool *pool, unsigned nr_free, pages_to_free = kmalloc(npages_to_free * sizeof(struct page *), GFP_KERNEL); if (!pages_to_free) { - pr_err("Failed to allocate memory for pool free operation\n"); + pr_debug("Failed to allocate memory for pool free operation\n"); return 0; } @@ -494,12 +502,14 @@ static void ttm_handle_caching_state_failure(struct list_head *pages, * pages returned in pages array. */ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags, - int ttm_flags, enum ttm_caching_state cstate, unsigned count) + int ttm_flags, enum ttm_caching_state cstate, + unsigned count, unsigned order) { struct page **caching_array; struct page *p; int r = 0; - unsigned i, cpages; + unsigned i, j, cpages; + unsigned npages = 1 << order; unsigned max_cpages = min(count, (unsigned)(PAGE_SIZE/sizeof(struct page *))); @@ -507,15 +517,15 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags, caching_array = kmalloc(max_cpages*sizeof(struct page *), GFP_KERNEL); if (!caching_array) { - pr_err("Unable to allocate table for new pages\n"); + pr_debug("Unable to allocate table for new pages\n"); return -ENOMEM; } for (i = 0, cpages = 0; i < count; ++i) { - p = alloc_page(gfp_flags); + p = alloc_pages(gfp_flags, order); if (!p) { - pr_err("Unable to get page %u\n", i); + pr_debug("Unable to get page %u\n", i); /* store already allocated pages in the pool after * setting the caching state */ @@ -531,14 +541,18 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags, goto out; } + list_add(&p->lru, pages); + #ifdef CONFIG_HIGHMEM /* gfp flags of highmem page should never be dma32 so we * we should be fine in such case */ - if (!PageHighMem(p)) + if (PageHighMem(p)) + continue; + #endif - { - caching_array[cpages++] = p; + for (j = 0; j < npages; ++j) { + caching_array[cpages++] = p++; if (cpages == max_cpages) { r = ttm_set_pages_caching(caching_array, @@ -552,8 +566,6 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags, cpages = 0; } } - - list_add(&p->lru, pages); } if (cpages) { @@ -573,9 +585,9 @@ out: * Fill the given pool if there aren't enough pages and the requested number of * pages is small. */ -static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, - int ttm_flags, enum ttm_caching_state cstate, unsigned count, - unsigned long *irq_flags) +static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, int ttm_flags, + enum ttm_caching_state cstate, + unsigned count, unsigned long *irq_flags) { struct page *p; int r; @@ -605,7 +617,7 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, INIT_LIST_HEAD(&new_pages); r = ttm_alloc_new_pages(&new_pages, pool->gfp_flags, ttm_flags, - cstate, alloc_size); + cstate, alloc_size, 0); spin_lock_irqsave(&pool->lock, *irq_flags); if (!r) { @@ -613,7 +625,7 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, ++pool->nrefills; pool->npages += alloc_size; } else { - pr_err("Failed to fill pool (%p)\n", pool); + pr_debug("Failed to fill pool (%p)\n", pool); /* If we have any pages left put them to the pool. */ list_for_each_entry(p, &new_pages, lru) { ++cpages; @@ -627,22 +639,25 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, } /** - * Cut 'count' number of pages from the pool and put them on the return list. + * Allocate pages from the pool and put them on the return list. * - * @return count of pages still required to fulfill the request. + * @return zero for success or negative error code. */ -static unsigned ttm_page_pool_get_pages(struct ttm_page_pool *pool, - struct list_head *pages, - int ttm_flags, - enum ttm_caching_state cstate, - unsigned count) +static int ttm_page_pool_get_pages(struct ttm_page_pool *pool, + struct list_head *pages, + int ttm_flags, + enum ttm_caching_state cstate, + unsigned count, unsigned order) { unsigned long irq_flags; struct list_head *p; unsigned i; + int r = 0; spin_lock_irqsave(&pool->lock, irq_flags); - ttm_page_pool_fill_locked(pool, ttm_flags, cstate, count, &irq_flags); + if (!order) + ttm_page_pool_fill_locked(pool, ttm_flags, cstate, count, + &irq_flags); if (count >= pool->npages) { /* take all pages from the pool */ @@ -672,32 +687,126 @@ static unsigned ttm_page_pool_get_pages(struct ttm_page_pool *pool, count = 0; out: spin_unlock_irqrestore(&pool->lock, irq_flags); - return count; + + /* clear the pages coming from the pool if requested */ + if (ttm_flags & TTM_PAGE_FLAG_ZERO_ALLOC) { + struct page *page; + + list_for_each_entry(page, pages, lru) { + if (PageHighMem(page)) + clear_highpage(page); + else + clear_page(page_address(page)); + } + } + + /* If pool didn't have enough pages allocate new one. */ + if (count) { + gfp_t gfp_flags = pool->gfp_flags; + + /* set zero flag for page allocation if required */ + if (ttm_flags & TTM_PAGE_FLAG_ZERO_ALLOC) + gfp_flags |= __GFP_ZERO; + + /* ttm_alloc_new_pages doesn't reference pool so we can run + * multiple requests in parallel. + **/ + r = ttm_alloc_new_pages(pages, gfp_flags, ttm_flags, cstate, + count, order); + } + + return r; } /* Put all pages in pages list to correct pool to wait for reuse */ static void ttm_put_pages(struct page **pages, unsigned npages, int flags, enum ttm_caching_state cstate) { + struct ttm_page_pool *pool = ttm_get_pool(flags, false, cstate); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + struct ttm_page_pool *huge = ttm_get_pool(flags, true, cstate); +#endif unsigned long irq_flags; - struct ttm_page_pool *pool = ttm_get_pool(flags, cstate); unsigned i; if (pool == NULL) { /* No pool for this memory type so free the pages */ - for (i = 0; i < npages; i++) { - if (pages[i]) { - if (page_count(pages[i]) != 1) - pr_err("Erroneous page count. Leaking pages.\n"); - __free_page(pages[i]); - pages[i] = NULL; + i = 0; + while (i < npages) { +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + struct page *p = pages[i]; +#endif + unsigned order = 0, j; + + if (!pages[i]) { + ++i; + continue; + } + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + for (j = 0; j < HPAGE_PMD_NR; ++j) + if (p++ != pages[i + j]) + break; + + if (j == HPAGE_PMD_NR) + order = HPAGE_PMD_ORDER; +#endif + + if (page_count(pages[i]) != 1) + pr_err("Erroneous page count. Leaking pages.\n"); + __free_pages(pages[i], order); + + j = 1 << order; + while (j) { + pages[i++] = NULL; + --j; } } return; } + i = 0; +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (huge) { + unsigned max_size, n2free; + + spin_lock_irqsave(&huge->lock, irq_flags); + while (i < npages) { + struct page *p = pages[i]; + unsigned j; + + if (!p) + break; + + for (j = 0; j < HPAGE_PMD_NR; ++j) + if (p++ != pages[i + j]) + break; + + if (j != HPAGE_PMD_NR) + break; + + list_add_tail(&pages[i]->lru, &huge->list); + + for (j = 0; j < HPAGE_PMD_NR; ++j) + pages[i++] = NULL; + huge->npages++; + } + + /* Check that we don't go over the pool limit */ + max_size = _manager->options.max_size; + max_size /= HPAGE_PMD_NR; + if (huge->npages > max_size) + n2free = huge->npages - max_size; + else + n2free = 0; + spin_unlock_irqrestore(&huge->lock, irq_flags); + if (n2free) + ttm_page_pool_free(huge, n2free, false); + } +#endif + spin_lock_irqsave(&pool->lock, irq_flags); - for (i = 0; i < npages; i++) { + while (i < npages) { if (pages[i]) { if (page_count(pages[i]) != 1) pr_err("Erroneous page count. Leaking pages.\n"); @@ -705,6 +814,7 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags, pages[i] = NULL; pool->npages++; } + ++i; } /* Check that we don't go over the pool limit */ npages = 0; @@ -727,75 +837,96 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags, static int ttm_get_pages(struct page **pages, unsigned npages, int flags, enum ttm_caching_state cstate) { - struct ttm_page_pool *pool = ttm_get_pool(flags, cstate); + struct ttm_page_pool *pool = ttm_get_pool(flags, false, cstate); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + struct ttm_page_pool *huge = ttm_get_pool(flags, true, cstate); +#endif struct list_head plist; struct page *p = NULL; - gfp_t gfp_flags = GFP_USER; unsigned count; int r; - /* set zero flag for page allocation if required */ - if (flags & TTM_PAGE_FLAG_ZERO_ALLOC) - gfp_flags |= __GFP_ZERO; - /* No pool for cached pages */ if (pool == NULL) { + gfp_t gfp_flags = GFP_USER; + unsigned i; +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + unsigned j; +#endif + + /* set zero flag for page allocation if required */ + if (flags & TTM_PAGE_FLAG_ZERO_ALLOC) + gfp_flags |= __GFP_ZERO; + if (flags & TTM_PAGE_FLAG_DMA32) gfp_flags |= GFP_DMA32; else gfp_flags |= GFP_HIGHUSER; - for (r = 0; r < npages; ++r) { + i = 0; +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + while (npages >= HPAGE_PMD_NR) { + gfp_t huge_flags = gfp_flags; + + huge_flags |= GFP_TRANSHUGE; + huge_flags &= ~__GFP_MOVABLE; + huge_flags &= ~__GFP_COMP; + p = alloc_pages(huge_flags, HPAGE_PMD_ORDER); + if (!p) + break; + + for (j = 0; j < HPAGE_PMD_NR; ++j) + pages[i++] = p++; + + npages -= HPAGE_PMD_NR; + } +#endif + + while (npages) { p = alloc_page(gfp_flags); if (!p) { - - pr_err("Unable to allocate page\n"); + pr_debug("Unable to allocate page\n"); return -ENOMEM; } - pages[r] = p; + pages[i++] = p; + --npages; } return 0; } - /* combine zero flag to pool flags */ - gfp_flags |= pool->gfp_flags; - - /* First we take pages from the pool */ - INIT_LIST_HEAD(&plist); - npages = ttm_page_pool_get_pages(pool, &plist, flags, cstate, npages); count = 0; - list_for_each_entry(p, &plist, lru) { - pages[count++] = p; - } - /* clear the pages coming from the pool if requested */ - if (flags & TTM_PAGE_FLAG_ZERO_ALLOC) { +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (huge && npages >= HPAGE_PMD_NR) { + INIT_LIST_HEAD(&plist); + ttm_page_pool_get_pages(huge, &plist, flags, cstate, + npages / HPAGE_PMD_NR, + HPAGE_PMD_ORDER); + list_for_each_entry(p, &plist, lru) { - if (PageHighMem(p)) - clear_highpage(p); - else - clear_page(page_address(p)); + unsigned j; + + for (j = 0; j < HPAGE_PMD_NR; ++j) + pages[count++] = &p[j]; } } +#endif - /* If pool didn't have enough pages allocate new one. */ - if (npages > 0) { - /* ttm_alloc_new_pages doesn't reference pool so we can run - * multiple requests in parallel. - **/ - INIT_LIST_HEAD(&plist); - r = ttm_alloc_new_pages(&plist, gfp_flags, flags, cstate, npages); - list_for_each_entry(p, &plist, lru) { - pages[count++] = p; - } - if (r) { - /* If there is any pages in the list put them back to - * the pool. */ - pr_err("Failed to allocate extra pages for large request\n"); - ttm_put_pages(pages, count, flags, cstate); - return r; - } + INIT_LIST_HEAD(&plist); + r = ttm_page_pool_get_pages(pool, &plist, flags, cstate, + npages - count, 0); + + list_for_each_entry(p, &plist, lru) + pages[count++] = p; + + if (r) { + /* If there is any pages in the list put them back to + * the pool. + */ + pr_debug("Failed to allocate extra pages for large request\n"); + ttm_put_pages(pages, count, flags, cstate); + return r; } return 0; @@ -832,6 +963,14 @@ int ttm_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages) ttm_page_pool_init_locked(&_manager->uc_pool_dma32, GFP_USER | GFP_DMA32, "uc dma"); + ttm_page_pool_init_locked(&_manager->wc_pool_huge, + GFP_TRANSHUGE & ~(__GFP_MOVABLE | __GFP_COMP), + "wc huge"); + + ttm_page_pool_init_locked(&_manager->uc_pool_huge, + GFP_TRANSHUGE & ~(__GFP_MOVABLE | __GFP_COMP) + , "uc huge"); + _manager->options.max_size = max_pages; _manager->options.small = SMALL_ALLOCATION; _manager->options.alloc_size = NUM_PAGES_TO_ALLOC; @@ -873,15 +1012,14 @@ int ttm_pool_populate(struct ttm_tt *ttm) if (ttm->state != tt_unpopulated) return 0; - for (i = 0; i < ttm->num_pages; ++i) { - ret = ttm_get_pages(&ttm->pages[i], 1, - ttm->page_flags, - ttm->caching_state); - if (ret != 0) { - ttm_pool_unpopulate(ttm); - return -ENOMEM; - } + ret = ttm_get_pages(ttm->pages, ttm->num_pages, ttm->page_flags, + ttm->caching_state); + if (unlikely(ret != 0)) { + ttm_pool_unpopulate(ttm); + return ret; + } + for (i = 0; i < ttm->num_pages; ++i) { ret = ttm_mem_global_alloc_page(mem_glob, ttm->pages[i], PAGE_SIZE); if (unlikely(ret != 0)) { @@ -908,14 +1046,14 @@ void ttm_pool_unpopulate(struct ttm_tt *ttm) unsigned i; for (i = 0; i < ttm->num_pages; ++i) { - if (ttm->pages[i]) { - ttm_mem_global_free_page(ttm->glob->mem_glob, - ttm->pages[i], PAGE_SIZE); - ttm_put_pages(&ttm->pages[i], 1, - ttm->page_flags, - ttm->caching_state); - } + if (!ttm->pages[i]) + continue; + + ttm_mem_global_free_page(ttm->glob->mem_glob, ttm->pages[i], + PAGE_SIZE); } + ttm_put_pages(ttm->pages, ttm->num_pages, ttm->page_flags, + ttm->caching_state); ttm->state = tt_unpopulated; } EXPORT_SYMBOL(ttm_pool_unpopulate); @@ -923,16 +1061,26 @@ EXPORT_SYMBOL(ttm_pool_unpopulate); #if defined(CONFIG_SWIOTLB) || defined(CONFIG_INTEL_IOMMU) int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt) { - unsigned i; + unsigned i, j; int r; r = ttm_pool_populate(&tt->ttm); if (r) return r; - for (i = 0; i < tt->ttm.num_pages; i++) { + for (i = 0; i < tt->ttm.num_pages; ++i) { + struct page *p = tt->ttm.pages[i]; + size_t num_pages = 1; + + for (j = i + 1; j < tt->ttm.num_pages; ++j) { + if (++p != tt->ttm.pages[j]) + break; + + ++num_pages; + } + tt->dma_address[i] = dma_map_page(dev, tt->ttm.pages[i], - 0, PAGE_SIZE, + 0, num_pages * PAGE_SIZE, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, tt->dma_address[i])) { while (i--) { @@ -943,6 +1091,11 @@ int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt) ttm_pool_unpopulate(&tt->ttm); return -EFAULT; } + + for (j = 1; j < num_pages; ++j) { + tt->dma_address[i + 1] = tt->dma_address[i] + PAGE_SIZE; + ++i; + } } return 0; } @@ -950,13 +1103,28 @@ EXPORT_SYMBOL(ttm_populate_and_map_pages); void ttm_unmap_and_unpopulate_pages(struct device *dev, struct ttm_dma_tt *tt) { - unsigned i; - - for (i = 0; i < tt->ttm.num_pages; i++) { - if (tt->dma_address[i]) { - dma_unmap_page(dev, tt->dma_address[i], - PAGE_SIZE, DMA_BIDIRECTIONAL); + unsigned i, j; + + for (i = 0; i < tt->ttm.num_pages;) { + struct page *p = tt->ttm.pages[i]; + size_t num_pages = 1; + + if (!tt->dma_address[i] || !tt->ttm.pages[i]) { + ++i; + continue; } + + for (j = i + 1; j < tt->ttm.num_pages; ++j) { + if (++p != tt->ttm.pages[j]) + break; + + ++num_pages; + } + + dma_unmap_page(dev, tt->dma_address[i], num_pages * PAGE_SIZE, + DMA_BIDIRECTIONAL); + + i += num_pages; } ttm_pool_unpopulate(&tt->ttm); } @@ -972,12 +1140,12 @@ int ttm_page_alloc_debugfs(struct seq_file *m, void *data) seq_printf(m, "No pool allocator running.\n"); return 0; } - seq_printf(m, "%6s %12s %13s %8s\n", + seq_printf(m, "%7s %12s %13s %8s\n", h[0], h[1], h[2], h[3]); for (i = 0; i < NUM_POOLS; ++i) { p = &_manager->pools[i]; - seq_printf(m, "%6s %12ld %13ld %8d\n", + seq_printf(m, "%7s %12ld %13ld %8d\n", p->name, p->nrefills, p->nfrees, p->npages); } diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index e5ef10d34748..6b2627fe9bc1 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c @@ -463,7 +463,7 @@ static unsigned ttm_dma_page_pool_free(struct dma_pool *pool, unsigned nr_free, GFP_KERNEL); if (!pages_to_free) { - pr_err("%s: Failed to allocate memory for pool free operation\n", + pr_debug("%s: Failed to allocate memory for pool free operation\n", pool->dev_name); return 0; } @@ -755,7 +755,7 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool, caching_array = kmalloc(max_cpages*sizeof(struct page *), GFP_KERNEL); if (!caching_array) { - pr_err("%s: Unable to allocate table for new pages\n", + pr_debug("%s: Unable to allocate table for new pages\n", pool->dev_name); return -ENOMEM; } @@ -768,8 +768,8 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool, for (i = 0, cpages = 0; i < count; ++i) { dma_p = __ttm_dma_alloc_page(pool); if (!dma_p) { - pr_err("%s: Unable to get page %u\n", - pool->dev_name, i); + pr_debug("%s: Unable to get page %u\n", + pool->dev_name, i); /* store already allocated pages in the pool after * setting the caching state */ @@ -855,8 +855,8 @@ static int ttm_dma_page_pool_fill_locked(struct dma_pool *pool, struct dma_page *d_page; unsigned cpages = 0; - pr_err("%s: Failed to fill %s pool (r:%d)!\n", - pool->dev_name, pool->name, r); + pr_debug("%s: Failed to fill %s pool (r:%d)!\n", + pool->dev_name, pool->name, r); list_for_each_entry(d_page, &d_pages, page_list) { cpages++; @@ -913,6 +913,7 @@ static gfp_t ttm_dma_pool_gfp_flags(struct ttm_dma_tt *ttm_dma, bool huge) if (huge) { gfp_flags |= GFP_TRANSHUGE; gfp_flags &= ~__GFP_MOVABLE; + gfp_flags &= ~__GFP_COMP; } return gfp_flags; diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c index 091ca81658eb..c3dc1fd20cb4 100644 --- a/drivers/gpu/drm/udl/udl_connector.c +++ b/drivers/gpu/drm/udl/udl_connector.c @@ -14,70 +14,95 @@ #include <drm/drm_crtc.h> #include <drm/drm_edid.h> #include <drm/drm_crtc_helper.h> +#include "udl_connector.h" #include "udl_drv.h" -/* dummy connector to just get EDID, - all UDL appear to have a DVI-D */ - -static u8 *udl_get_edid(struct udl_device *udl) +static bool udl_get_edid_block(struct udl_device *udl, int block_idx, + u8 *buff) { - u8 *block; - char *rbuf; int ret, i; + u8 *read_buff; - block = kmalloc(EDID_LENGTH, GFP_KERNEL); - if (block == NULL) - return NULL; - - rbuf = kmalloc(2, GFP_KERNEL); - if (rbuf == NULL) - goto error; + read_buff = kmalloc(2, GFP_KERNEL); + if (!read_buff) + return false; for (i = 0; i < EDID_LENGTH; i++) { + int bval = (i + block_idx * EDID_LENGTH) << 8; ret = usb_control_msg(udl->udev, - usb_rcvctrlpipe(udl->udev, 0), (0x02), - (0x80 | (0x02 << 5)), i << 8, 0xA1, rbuf, 2, - HZ); + usb_rcvctrlpipe(udl->udev, 0), + (0x02), (0x80 | (0x02 << 5)), bval, + 0xA1, read_buff, 2, HZ); if (ret < 1) { DRM_ERROR("Read EDID byte %d failed err %x\n", i, ret); - goto error; + kfree(read_buff); + return false; } - block[i] = rbuf[1]; + buff[i] = read_buff[1]; } - kfree(rbuf); - return block; - -error: - kfree(block); - kfree(rbuf); - return NULL; + kfree(read_buff); + return true; } -static int udl_get_modes(struct drm_connector *connector) +static bool udl_get_edid(struct udl_device *udl, u8 **result_buff, + int *result_buff_size) { - struct udl_device *udl = connector->dev->dev_private; - struct edid *edid; - int ret; - - edid = (struct edid *)udl_get_edid(udl); - if (!edid) { - drm_mode_connector_update_edid_property(connector, NULL); - return 0; + int i, extensions; + u8 *block_buff = NULL, *buff_ptr; + + block_buff = kmalloc(EDID_LENGTH, GFP_KERNEL); + if (block_buff == NULL) + return false; + + if (udl_get_edid_block(udl, 0, block_buff) && + memchr_inv(block_buff, 0, EDID_LENGTH)) { + extensions = ((struct edid *)block_buff)->extensions; + if (extensions > 0) { + /* we have to read all extensions one by one */ + *result_buff_size = EDID_LENGTH * (extensions + 1); + *result_buff = kmalloc(*result_buff_size, GFP_KERNEL); + buff_ptr = *result_buff; + if (buff_ptr == NULL) { + kfree(block_buff); + return false; + } + memcpy(buff_ptr, block_buff, EDID_LENGTH); + kfree(block_buff); + buff_ptr += EDID_LENGTH; + for (i = 1; i < extensions; ++i) { + if (udl_get_edid_block(udl, i, buff_ptr)) { + buff_ptr += EDID_LENGTH; + } else { + kfree(*result_buff); + *result_buff = NULL; + return false; + } + } + return true; + } + /* we have only base edid block */ + *result_buff = block_buff; + *result_buff_size = EDID_LENGTH; + return true; } - /* - * We only read the main block, but if the monitor reports extension - * blocks then the drm edid code expects them to be present, so patch - * the extension count to 0. - */ - edid->checksum += edid->extensions; - edid->extensions = 0; - - drm_mode_connector_update_edid_property(connector, edid); - ret = drm_add_edid_modes(connector, edid); - kfree(edid); - return ret; + kfree(block_buff); + + return false; +} + +static int udl_get_modes(struct drm_connector *connector) +{ + struct udl_drm_connector *udl_connector = + container_of(connector, + struct udl_drm_connector, + connector); + + drm_mode_connector_update_edid_property(connector, udl_connector->edid); + if (udl_connector->edid) + return drm_add_edid_modes(connector, udl_connector->edid); + return 0; } static int udl_mode_valid(struct drm_connector *connector, @@ -96,8 +121,26 @@ static int udl_mode_valid(struct drm_connector *connector, static enum drm_connector_status udl_detect(struct drm_connector *connector, bool force) { - if (drm_dev_is_unplugged(connector->dev)) + u8 *edid_buff = NULL; + int edid_buff_size = 0; + struct udl_device *udl = connector->dev->dev_private; + struct udl_drm_connector *udl_connector = + container_of(connector, + struct udl_drm_connector, + connector); + + /* cleanup previous edid */ + if (udl_connector->edid != NULL) { + kfree(udl_connector->edid); + udl_connector->edid = NULL; + } + + + if (!udl_get_edid(udl, &edid_buff, &edid_buff_size)) return connector_status_disconnected; + + udl_connector->edid = (struct edid *)edid_buff; + return connector_status_connected; } @@ -117,8 +160,14 @@ static int udl_connector_set_property(struct drm_connector *connector, static void udl_connector_destroy(struct drm_connector *connector) { + struct udl_drm_connector *udl_connector = + container_of(connector, + struct udl_drm_connector, + connector); + drm_connector_unregister(connector); drm_connector_cleanup(connector); + kfree(udl_connector->edid); kfree(connector); } @@ -138,17 +187,22 @@ static const struct drm_connector_funcs udl_connector_funcs = { int udl_connector_init(struct drm_device *dev, struct drm_encoder *encoder) { + struct udl_drm_connector *udl_connector; struct drm_connector *connector; - connector = kzalloc(sizeof(struct drm_connector), GFP_KERNEL); - if (!connector) + udl_connector = kzalloc(sizeof(struct udl_drm_connector), GFP_KERNEL); + if (!udl_connector) return -ENOMEM; - drm_connector_init(dev, connector, &udl_connector_funcs, DRM_MODE_CONNECTOR_DVII); + connector = &udl_connector->connector; + drm_connector_init(dev, connector, &udl_connector_funcs, + DRM_MODE_CONNECTOR_DVII); drm_connector_helper_add(connector, &udl_connector_helper_funcs); drm_connector_register(connector); drm_mode_connector_attach_encoder(connector, encoder); + connector->polled = DRM_CONNECTOR_POLL_HPD | + DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; return 0; } diff --git a/drivers/gpu/drm/udl/udl_connector.h b/drivers/gpu/drm/udl/udl_connector.h new file mode 100644 index 000000000000..0fb0db5c4612 --- /dev/null +++ b/drivers/gpu/drm/udl/udl_connector.h @@ -0,0 +1,13 @@ +#ifndef __UDL_CONNECTOR_H__ +#define __UDL_CONNECTOR_H__ + +#include <drm/drm_crtc.h> + +struct udl_drm_connector { + struct drm_connector connector; + /* last udl_detect edid */ + struct edid *edid; +}; + + +#endif //__UDL_CONNECTOR_H__ diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c index 31421b6b586e..3c45a3064726 100644 --- a/drivers/gpu/drm/udl/udl_drv.c +++ b/drivers/gpu/drm/udl/udl_drv.c @@ -14,6 +14,9 @@ static int udl_usb_suspend(struct usb_interface *interface, pm_message_t message) { + struct drm_device *dev = usb_get_intfdata(interface); + + drm_kms_helper_poll_disable(dev); return 0; } @@ -21,6 +24,7 @@ static int udl_usb_resume(struct usb_interface *interface) { struct drm_device *dev = usb_get_intfdata(interface); + drm_kms_helper_poll_enable(dev); udl_modeset_restore(dev); return 0; } diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c index 0328b2c7b210..f1ec4528a73e 100644 --- a/drivers/gpu/drm/udl/udl_main.c +++ b/drivers/gpu/drm/udl/udl_main.c @@ -11,6 +11,7 @@ * more details. */ #include <drm/drmP.h> +#include <drm/drm_crtc_helper.h> #include "udl_drv.h" /* -BULK_SIZE as per usb-skeleton. Can we get full page and avoid overhead? */ @@ -350,6 +351,8 @@ int udl_driver_load(struct drm_device *dev, unsigned long flags) if (ret) goto err_fb; + drm_kms_helper_poll_init(dev); + return 0; err_fb: udl_fbdev_cleanup(dev); @@ -371,6 +374,8 @@ void udl_driver_unload(struct drm_device *dev) { struct udl_device *udl = dev->dev_private; + drm_kms_helper_poll_fini(dev); + if (udl->urbs.count) udl_free_urb_list(dev); diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile index 719a771f3d5c..f5500df51686 100644 --- a/drivers/gpu/drm/vc4/Makefile +++ b/drivers/gpu/drm/vc4/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # Please keep these build lists sorted! # core driver code diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 3afdbf4bc10b..98a6cb9f44fc 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -53,6 +53,17 @@ static void vc4_bo_stats_dump(struct vc4_dev *vc4) vc4->bo_labels[i].size_allocated / 1024, vc4->bo_labels[i].num_allocated); } + + mutex_lock(&vc4->purgeable.lock); + if (vc4->purgeable.num) + DRM_INFO("%30s: %6zdkb BOs (%d)\n", "userspace BO cache", + vc4->purgeable.size / 1024, vc4->purgeable.num); + + if (vc4->purgeable.purged_num) + DRM_INFO("%30s: %6zdkb BOs (%d)\n", "total purged BO", + vc4->purgeable.purged_size / 1024, + vc4->purgeable.purged_num); + mutex_unlock(&vc4->purgeable.lock); } #ifdef CONFIG_DEBUG_FS @@ -75,6 +86,17 @@ int vc4_bo_stats_debugfs(struct seq_file *m, void *unused) } mutex_unlock(&vc4->bo_lock); + mutex_lock(&vc4->purgeable.lock); + if (vc4->purgeable.num) + seq_printf(m, "%30s: %6zdkb BOs (%d)\n", "userspace BO cache", + vc4->purgeable.size / 1024, vc4->purgeable.num); + + if (vc4->purgeable.purged_num) + seq_printf(m, "%30s: %6zdkb BOs (%d)\n", "total purged BO", + vc4->purgeable.purged_size / 1024, + vc4->purgeable.purged_num); + mutex_unlock(&vc4->purgeable.lock); + return 0; } #endif @@ -247,6 +269,109 @@ static void vc4_bo_cache_purge(struct drm_device *dev) mutex_unlock(&vc4->bo_lock); } +void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo) +{ + struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev); + + mutex_lock(&vc4->purgeable.lock); + list_add_tail(&bo->size_head, &vc4->purgeable.list); + vc4->purgeable.num++; + vc4->purgeable.size += bo->base.base.size; + mutex_unlock(&vc4->purgeable.lock); +} + +static void vc4_bo_remove_from_purgeable_pool_locked(struct vc4_bo *bo) +{ + struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev); + + /* list_del_init() is used here because the caller might release + * the purgeable lock in order to acquire the madv one and update the + * madv status. + * During this short period of time a user might decide to mark + * the BO as unpurgeable, and if bo->madv is set to + * VC4_MADV_DONTNEED it will try to remove the BO from the + * purgeable list which will fail if the ->next/prev fields + * are set to LIST_POISON1/LIST_POISON2 (which is what + * list_del() does). + * Re-initializing the list element guarantees that list_del() + * will work correctly even if it's a NOP. + */ + list_del_init(&bo->size_head); + vc4->purgeable.num--; + vc4->purgeable.size -= bo->base.base.size; +} + +void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo) +{ + struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev); + + mutex_lock(&vc4->purgeable.lock); + vc4_bo_remove_from_purgeable_pool_locked(bo); + mutex_unlock(&vc4->purgeable.lock); +} + +static void vc4_bo_purge(struct drm_gem_object *obj) +{ + struct vc4_bo *bo = to_vc4_bo(obj); + struct drm_device *dev = obj->dev; + + WARN_ON(!mutex_is_locked(&bo->madv_lock)); + WARN_ON(bo->madv != VC4_MADV_DONTNEED); + + drm_vma_node_unmap(&obj->vma_node, dev->anon_inode->i_mapping); + + dma_free_wc(dev->dev, obj->size, bo->base.vaddr, bo->base.paddr); + bo->base.vaddr = NULL; + bo->madv = __VC4_MADV_PURGED; +} + +static void vc4_bo_userspace_cache_purge(struct drm_device *dev) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + + mutex_lock(&vc4->purgeable.lock); + while (!list_empty(&vc4->purgeable.list)) { + struct vc4_bo *bo = list_first_entry(&vc4->purgeable.list, + struct vc4_bo, size_head); + struct drm_gem_object *obj = &bo->base.base; + size_t purged_size = 0; + + vc4_bo_remove_from_purgeable_pool_locked(bo); + + /* Release the purgeable lock while we're purging the BO so + * that other people can continue inserting things in the + * purgeable pool without having to wait for all BOs to be + * purged. + */ + mutex_unlock(&vc4->purgeable.lock); + mutex_lock(&bo->madv_lock); + + /* Since we released the purgeable pool lock before acquiring + * the BO madv one, the user may have marked the BO as WILLNEED + * and re-used it in the meantime. + * Before purging the BO we need to make sure + * - it is still marked as DONTNEED + * - it has not been re-inserted in the purgeable list + * - it is not used by HW blocks + * If one of these conditions is not met, just skip the entry. + */ + if (bo->madv == VC4_MADV_DONTNEED && + list_empty(&bo->size_head) && + !refcount_read(&bo->usecnt)) { + purged_size = bo->base.base.size; + vc4_bo_purge(obj); + } + mutex_unlock(&bo->madv_lock); + mutex_lock(&vc4->purgeable.lock); + + if (purged_size) { + vc4->purgeable.purged_size += purged_size; + vc4->purgeable.purged_num++; + } + } + mutex_unlock(&vc4->purgeable.lock); +} + static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev, uint32_t size, enum vc4_kernel_bo_type type) @@ -293,6 +418,9 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size) if (!bo) return ERR_PTR(-ENOMEM); + bo->madv = VC4_MADV_WILLNEED; + refcount_set(&bo->usecnt, 0); + mutex_init(&bo->madv_lock); mutex_lock(&vc4->bo_lock); bo->label = VC4_BO_TYPE_KERNEL; vc4->bo_labels[VC4_BO_TYPE_KERNEL].num_allocated++; @@ -330,16 +458,38 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size, * CMA allocations we've got laying around and try again. */ vc4_bo_cache_purge(dev); + cma_obj = drm_gem_cma_create(dev, size); + } + if (IS_ERR(cma_obj)) { + /* + * Still not enough CMA memory, purge the userspace BO + * cache and retry. + * This is sub-optimal since we purge the whole userspace + * BO cache which forces user that want to re-use the BO to + * restore its initial content. + * Ideally, we should purge entries one by one and retry + * after each to see if CMA allocation succeeds. Or even + * better, try to find an entry with at least the same + * size. + */ + vc4_bo_userspace_cache_purge(dev); cma_obj = drm_gem_cma_create(dev, size); - if (IS_ERR(cma_obj)) { - DRM_ERROR("Failed to allocate from CMA:\n"); - vc4_bo_stats_dump(vc4); - return ERR_PTR(-ENOMEM); - } + } + + if (IS_ERR(cma_obj)) { + DRM_ERROR("Failed to allocate from CMA:\n"); + vc4_bo_stats_dump(vc4); + return ERR_PTR(-ENOMEM); } bo = to_vc4_bo(&cma_obj->base); + /* By default, BOs do not support the MADV ioctl. This will be enabled + * only on BOs that are exposed to userspace (V3D, V3D_SHADER and DUMB + * BOs). + */ + bo->madv = __VC4_MADV_NOTSUPP; + mutex_lock(&vc4->bo_lock); vc4_bo_set_label(&cma_obj->base, type); mutex_unlock(&vc4->bo_lock); @@ -365,6 +515,8 @@ int vc4_dumb_create(struct drm_file *file_priv, if (IS_ERR(bo)) return PTR_ERR(bo); + bo->madv = VC4_MADV_WILLNEED; + ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle); drm_gem_object_put_unlocked(&bo->base.base); @@ -403,6 +555,12 @@ void vc4_free_object(struct drm_gem_object *gem_bo) struct vc4_bo *bo = to_vc4_bo(gem_bo); struct list_head *cache_list; + /* Remove the BO from the purgeable list. */ + mutex_lock(&bo->madv_lock); + if (bo->madv == VC4_MADV_DONTNEED && !refcount_read(&bo->usecnt)) + vc4_bo_remove_from_purgeable_pool(bo); + mutex_unlock(&bo->madv_lock); + mutex_lock(&vc4->bo_lock); /* If the object references someone else's memory, we can't cache it. */ @@ -418,7 +576,8 @@ void vc4_free_object(struct drm_gem_object *gem_bo) } /* If this object was partially constructed but CMA allocation - * had failed, just free it. + * had failed, just free it. Can also happen when the BO has been + * purged. */ if (!bo->base.vaddr) { vc4_bo_destroy(bo); @@ -437,6 +596,10 @@ void vc4_free_object(struct drm_gem_object *gem_bo) bo->validated_shader = NULL; } + /* Reset madv and usecnt before adding the BO to the cache. */ + bo->madv = __VC4_MADV_NOTSUPP; + refcount_set(&bo->usecnt, 0); + bo->t_format = false; bo->free_time = jiffies; list_add(&bo->size_head, cache_list); @@ -461,6 +624,56 @@ static void vc4_bo_cache_time_work(struct work_struct *work) mutex_unlock(&vc4->bo_lock); } +int vc4_bo_inc_usecnt(struct vc4_bo *bo) +{ + int ret; + + /* Fast path: if the BO is already retained by someone, no need to + * check the madv status. + */ + if (refcount_inc_not_zero(&bo->usecnt)) + return 0; + + mutex_lock(&bo->madv_lock); + switch (bo->madv) { + case VC4_MADV_WILLNEED: + refcount_inc(&bo->usecnt); + ret = 0; + break; + case VC4_MADV_DONTNEED: + /* We shouldn't use a BO marked as purgeable if at least + * someone else retained its content by incrementing usecnt. + * Luckily the BO hasn't been purged yet, but something wrong + * is happening here. Just throw an error instead of + * authorizing this use case. + */ + case __VC4_MADV_PURGED: + /* We can't use a purged BO. */ + default: + /* Invalid madv value. */ + ret = -EINVAL; + break; + } + mutex_unlock(&bo->madv_lock); + + return ret; +} + +void vc4_bo_dec_usecnt(struct vc4_bo *bo) +{ + /* Fast path: if the BO is still retained by someone, no need to test + * the madv value. + */ + if (refcount_dec_not_one(&bo->usecnt)) + return; + + mutex_lock(&bo->madv_lock); + if (refcount_dec_and_test(&bo->usecnt) && + bo->madv == VC4_MADV_DONTNEED) + vc4_bo_add_to_purgeable_pool(bo); + mutex_unlock(&bo->madv_lock); +} + static void vc4_bo_cache_time_timer(unsigned long data) { struct drm_device *dev = (struct drm_device *)data; @@ -480,18 +693,52 @@ struct dma_buf * vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags) { struct vc4_bo *bo = to_vc4_bo(obj); + struct dma_buf *dmabuf; + int ret; if (bo->validated_shader) { DRM_DEBUG("Attempting to export shader BO\n"); return ERR_PTR(-EINVAL); } - return drm_gem_prime_export(dev, obj, flags); + /* Note: as soon as the BO is exported it becomes unpurgeable, because + * noone ever decrements the usecnt even if the reference held by the + * exported BO is released. This shouldn't be a problem since we don't + * expect exported BOs to be marked as purgeable. + */ + ret = vc4_bo_inc_usecnt(bo); + if (ret) { + DRM_ERROR("Failed to increment BO usecnt\n"); + return ERR_PTR(ret); + } + + dmabuf = drm_gem_prime_export(dev, obj, flags); + if (IS_ERR(dmabuf)) + vc4_bo_dec_usecnt(bo); + + return dmabuf; +} + +int vc4_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct drm_gem_object *obj = vma->vm_private_data; + struct vc4_bo *bo = to_vc4_bo(obj); + + /* The only reason we would end up here is when user-space accesses + * BO's memory after it's been purged. + */ + mutex_lock(&bo->madv_lock); + WARN_ON(bo->madv != __VC4_MADV_PURGED); + mutex_unlock(&bo->madv_lock); + + return VM_FAULT_SIGBUS; } int vc4_mmap(struct file *filp, struct vm_area_struct *vma) { struct drm_gem_object *gem_obj; + unsigned long vm_pgoff; struct vc4_bo *bo; int ret; @@ -507,16 +754,36 @@ int vc4_mmap(struct file *filp, struct vm_area_struct *vma) return -EINVAL; } + if (bo->madv != VC4_MADV_WILLNEED) { + DRM_DEBUG("mmaping of %s BO not allowed\n", + bo->madv == VC4_MADV_DONTNEED ? + "purgeable" : "purged"); + return -EINVAL; + } + /* * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map * the whole buffer. */ vma->vm_flags &= ~VM_PFNMAP; - vma->vm_pgoff = 0; + /* This ->vm_pgoff dance is needed to make all parties happy: + * - dma_mmap_wc() uses ->vm_pgoff as an offset within the allocated + * mem-region, hence the need to set it to zero (the value set by + * the DRM core is a virtual offset encoding the GEM object-id) + * - the mmap() core logic needs ->vm_pgoff to be restored to its + * initial value before returning from this function because it + * encodes the offset of this GEM in the dev->anon_inode pseudo-file + * and this information will be used when we invalidate userspace + * mappings with drm_vma_node_unmap() (called from vc4_gem_purge()). + */ + vm_pgoff = vma->vm_pgoff; + vma->vm_pgoff = 0; ret = dma_mmap_wc(bo->base.base.dev->dev, vma, bo->base.vaddr, bo->base.paddr, vma->vm_end - vma->vm_start); + vma->vm_pgoff = vm_pgoff; + if (ret) drm_gem_vm_close(vma); @@ -580,6 +847,8 @@ int vc4_create_bo_ioctl(struct drm_device *dev, void *data, if (IS_ERR(bo)) return PTR_ERR(bo); + bo->madv = VC4_MADV_WILLNEED; + ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle); drm_gem_object_put_unlocked(&bo->base.base); @@ -633,6 +902,8 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, if (IS_ERR(bo)) return PTR_ERR(bo); + bo->madv = VC4_MADV_WILLNEED; + if (copy_from_user(bo->base.vaddr, (void __user *)(uintptr_t)args->data, args->size)) { diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 1c96edcb302b..e3c29729da2e 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -100,6 +100,7 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data, case DRM_VC4_PARAM_SUPPORTS_ETC1: case DRM_VC4_PARAM_SUPPORTS_THREADED_FS: case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER: + case DRM_VC4_PARAM_SUPPORTS_MADVISE: args->value = true; break; default: @@ -117,6 +118,12 @@ static void vc4_lastclose(struct drm_device *dev) drm_fbdev_cma_restore_mode(vc4->fbdev); } +static const struct vm_operations_struct vc4_vm_ops = { + .fault = vc4_fault, + .open = drm_gem_vm_open, + .close = drm_gem_vm_close, +}; + static const struct file_operations vc4_drm_fops = { .owner = THIS_MODULE, .open = drm_open, @@ -142,6 +149,7 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(VC4_SET_TILING, vc4_set_tiling_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(VC4_GET_TILING, vc4_get_tiling_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(VC4_LABEL_BO, vc4_label_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_GEM_MADVISE, vc4_gem_madvise_ioctl, DRM_RENDER_ALLOW), }; static struct drm_driver vc4_drm_driver = { @@ -166,7 +174,7 @@ static struct drm_driver vc4_drm_driver = { .gem_create_object = vc4_create_object, .gem_free_object_unlocked = vc4_free_object, - .gem_vm_ops = &drm_gem_cma_vm_ops, + .gem_vm_ops = &vc4_vm_ops, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 87f2d8e5c134..9c0d380c96f2 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -74,6 +74,19 @@ struct vc4_dev { /* Protects bo_cache and bo_labels. */ struct mutex bo_lock; + /* Purgeable BO pool. All BOs in this pool can have their memory + * reclaimed if the driver is unable to allocate new BOs. We also + * keep stats related to the purge mechanism here. + */ + struct { + struct list_head list; + unsigned int num; + size_t size; + unsigned int purged_num; + size_t purged_size; + struct mutex lock; + } purgeable; + uint64_t dma_fence_context; /* Sequence number for the last job queued in bin_job_list. @@ -192,6 +205,16 @@ struct vc4_bo { * for user-allocated labels. */ int label; + + /* Count the number of active users. This is needed to determine + * whether we can move the BO to the purgeable list or not (when the BO + * is used by the GPU or the display engine we can't purge it). + */ + refcount_t usecnt; + + /* Store purgeable/purged state here */ + u32 madv; + struct mutex madv_lock; }; static inline struct vc4_bo * @@ -503,6 +526,7 @@ int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int vc4_label_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int vc4_fault(struct vm_fault *vmf); int vc4_mmap(struct file *filp, struct vm_area_struct *vma); struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj); int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); @@ -513,6 +537,10 @@ void *vc4_prime_vmap(struct drm_gem_object *obj); int vc4_bo_cache_init(struct drm_device *dev); void vc4_bo_cache_destroy(struct drm_device *dev); int vc4_bo_stats_debugfs(struct seq_file *m, void *arg); +int vc4_bo_inc_usecnt(struct vc4_bo *bo); +void vc4_bo_dec_usecnt(struct vc4_bo *bo); +void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo); +void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo); /* vc4_crtc.c */ extern struct platform_driver vc4_crtc_driver; @@ -557,6 +585,8 @@ void vc4_job_handle_completed(struct vc4_dev *vc4); int vc4_queue_seqno_cb(struct drm_device *dev, struct vc4_seqno_cb *cb, uint64_t seqno, void (*func)(struct vc4_seqno_cb *cb)); +int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); /* vc4_hdmi.c */ extern struct platform_driver vc4_hdmi_driver; diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c index 554605af344e..94085f8bcd68 100644 --- a/drivers/gpu/drm/vc4/vc4_dsi.c +++ b/drivers/gpu/drm/vc4/vc4_dsi.c @@ -1360,6 +1360,27 @@ static void dsi_handle_error(struct vc4_dsi *dsi, *ret = IRQ_HANDLED; } +/* + * Initial handler for port 1 where we need the reg_dma workaround. + * The register DMA writes sleep, so we can't do it in the top half. + * Instead we use IRQF_ONESHOT so that the IRQ gets disabled in the + * parent interrupt contrller until our interrupt thread is done. + */ +static irqreturn_t vc4_dsi_irq_defer_to_thread_handler(int irq, void *data) +{ + struct vc4_dsi *dsi = data; + u32 stat = DSI_PORT_READ(INT_STAT); + + if (!stat) + return IRQ_NONE; + + return IRQ_WAKE_THREAD; +} + +/* + * Normal IRQ handler for port 0, or the threaded IRQ handler for port + * 1 where we need the reg_dma workaround. + */ static irqreturn_t vc4_dsi_irq_handler(int irq, void *data) { struct vc4_dsi *dsi = data; @@ -1539,8 +1560,15 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data) /* Clear any existing interrupt state. */ DSI_PORT_WRITE(INT_STAT, DSI_PORT_READ(INT_STAT)); - ret = devm_request_irq(dev, platform_get_irq(pdev, 0), - vc4_dsi_irq_handler, 0, "vc4 dsi", dsi); + if (dsi->reg_dma_mem) + ret = devm_request_threaded_irq(dev, platform_get_irq(pdev, 0), + vc4_dsi_irq_defer_to_thread_handler, + vc4_dsi_irq_handler, + IRQF_ONESHOT, + "vc4 dsi", dsi); + else + ret = devm_request_irq(dev, platform_get_irq(pdev, 0), + vc4_dsi_irq_handler, 0, "vc4 dsi", dsi); if (ret) { if (ret != -EPROBE_DEFER) dev_err(dev, "Failed to get interrupt: %d\n", ret); diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index d0c6bfb68c4e..e00ac2f3a264 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -188,11 +188,22 @@ vc4_save_hang_state(struct drm_device *dev) continue; for (j = 0; j < exec[i]->bo_count; j++) { + bo = to_vc4_bo(&exec[i]->bo[j]->base); + + /* Retain BOs just in case they were marked purgeable. + * This prevents the BO from being purged before + * someone had a chance to dump the hang state. + */ + WARN_ON(!refcount_read(&bo->usecnt)); + refcount_inc(&bo->usecnt); drm_gem_object_get(&exec[i]->bo[j]->base); kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base; } list_for_each_entry(bo, &exec[i]->unref_list, unref_head) { + /* No need to retain BOs coming from the ->unref_list + * because they are naturally unpurgeable. + */ drm_gem_object_get(&bo->base.base); kernel_state->bo[j + prev_idx] = &bo->base.base; j++; @@ -233,6 +244,26 @@ vc4_save_hang_state(struct drm_device *dev) state->fdbgs = V3D_READ(V3D_FDBGS); state->errstat = V3D_READ(V3D_ERRSTAT); + /* We need to turn purgeable BOs into unpurgeable ones so that + * userspace has a chance to dump the hang state before the kernel + * decides to purge those BOs. + * Note that BO consistency at dump time cannot be guaranteed. For + * example, if the owner of these BOs decides to re-use them or mark + * them purgeable again there's nothing we can do to prevent it. + */ + for (i = 0; i < kernel_state->user_state.bo_count; i++) { + struct vc4_bo *bo = to_vc4_bo(kernel_state->bo[i]); + + if (bo->madv == __VC4_MADV_NOTSUPP) + continue; + + mutex_lock(&bo->madv_lock); + if (!WARN_ON(bo->madv == __VC4_MADV_PURGED)) + bo->madv = VC4_MADV_WILLNEED; + refcount_dec(&bo->usecnt); + mutex_unlock(&bo->madv_lock); + } + spin_lock_irqsave(&vc4->job_lock, irqflags); if (vc4->hang_state) { spin_unlock_irqrestore(&vc4->job_lock, irqflags); @@ -639,9 +670,6 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, * The command validator needs to reference BOs by their index within * the submitted job's BO list. This does the validation of the job's * BO list and reference counting for the lifetime of the job. - * - * Note that this function doesn't need to unreference the BOs on - * failure, because that will happen at vc4_complete_exec() time. */ static int vc4_cl_lookup_bos(struct drm_device *dev, @@ -693,16 +721,47 @@ vc4_cl_lookup_bos(struct drm_device *dev, DRM_DEBUG("Failed to look up GEM BO %d: %d\n", i, handles[i]); ret = -EINVAL; - spin_unlock(&file_priv->table_lock); - goto fail; + break; } + drm_gem_object_get(bo); exec->bo[i] = (struct drm_gem_cma_object *)bo; } spin_unlock(&file_priv->table_lock); + if (ret) + goto fail_put_bo; + + for (i = 0; i < exec->bo_count; i++) { + ret = vc4_bo_inc_usecnt(to_vc4_bo(&exec->bo[i]->base)); + if (ret) + goto fail_dec_usecnt; + } + + kvfree(handles); + return 0; + +fail_dec_usecnt: + /* Decrease usecnt on acquired objects. + * We cannot rely on vc4_complete_exec() to release resources here, + * because vc4_complete_exec() has no information about which BO has + * had its ->usecnt incremented. + * To make things easier we just free everything explicitly and set + * exec->bo to NULL so that vc4_complete_exec() skips the 'BO release' + * step. + */ + for (i-- ; i >= 0; i--) + vc4_bo_dec_usecnt(to_vc4_bo(&exec->bo[i]->base)); + +fail_put_bo: + /* Release any reference to acquired objects. */ + for (i = 0; i < exec->bo_count && exec->bo[i]; i++) + drm_gem_object_put_unlocked(&exec->bo[i]->base); + fail: kvfree(handles); + kvfree(exec->bo); + exec->bo = NULL; return ret; } @@ -833,8 +892,12 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) dma_fence_signal(exec->fence); if (exec->bo) { - for (i = 0; i < exec->bo_count; i++) + for (i = 0; i < exec->bo_count; i++) { + struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base); + + vc4_bo_dec_usecnt(bo); drm_gem_object_put_unlocked(&exec->bo[i]->base); + } kvfree(exec->bo); } @@ -1098,6 +1161,9 @@ vc4_gem_init(struct drm_device *dev) INIT_WORK(&vc4->job_done_work, vc4_job_done_work); mutex_init(&vc4->power_lock); + + INIT_LIST_HEAD(&vc4->purgeable.list); + mutex_init(&vc4->purgeable.lock); } void @@ -1121,3 +1187,81 @@ vc4_gem_destroy(struct drm_device *dev) if (vc4->hang_state) vc4_free_hang_state(dev, vc4->hang_state); } + +int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_vc4_gem_madvise *args = data; + struct drm_gem_object *gem_obj; + struct vc4_bo *bo; + int ret; + + switch (args->madv) { + case VC4_MADV_DONTNEED: + case VC4_MADV_WILLNEED: + break; + default: + return -EINVAL; + } + + if (args->pad != 0) + return -EINVAL; + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + + bo = to_vc4_bo(gem_obj); + + /* Only BOs exposed to userspace can be purged. */ + if (bo->madv == __VC4_MADV_NOTSUPP) { + DRM_DEBUG("madvise not supported on this BO\n"); + ret = -EINVAL; + goto out_put_gem; + } + + /* Not sure it's safe to purge imported BOs. Let's just assume it's + * not until proven otherwise. + */ + if (gem_obj->import_attach) { + DRM_DEBUG("madvise not supported on imported BOs\n"); + ret = -EINVAL; + goto out_put_gem; + } + + mutex_lock(&bo->madv_lock); + + if (args->madv == VC4_MADV_DONTNEED && bo->madv == VC4_MADV_WILLNEED && + !refcount_read(&bo->usecnt)) { + /* If the BO is about to be marked as purgeable, is not used + * and is not already purgeable or purged, add it to the + * purgeable list. + */ + vc4_bo_add_to_purgeable_pool(bo); + } else if (args->madv == VC4_MADV_WILLNEED && + bo->madv == VC4_MADV_DONTNEED && + !refcount_read(&bo->usecnt)) { + /* The BO has not been purged yet, just remove it from + * the purgeable list. + */ + vc4_bo_remove_from_purgeable_pool(bo); + } + + /* Save the purged state. */ + args->retained = bo->madv != __VC4_MADV_PURGED; + + /* Update internal madv state only if the bo was not purged. */ + if (bo->madv != __VC4_MADV_PURGED) + bo->madv = args->madv; + + mutex_unlock(&bo->madv_lock); + + ret = 0; + +out_put_gem: + drm_gem_object_put_unlocked(gem_obj); + + return ret; +} diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 3a767a038f72..423a23ed8fc2 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -23,6 +23,7 @@ #include <drm/drm_fb_cma_helper.h> #include <drm/drm_plane_helper.h> +#include "uapi/drm/vc4_drm.h" #include "vc4_drv.h" #include "vc4_regs.h" @@ -774,21 +775,40 @@ static int vc4_prepare_fb(struct drm_plane *plane, { struct vc4_bo *bo; struct dma_fence *fence; + int ret; if ((plane->state->fb == state->fb) || !state->fb) return 0; bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base); + + ret = vc4_bo_inc_usecnt(bo); + if (ret) + return ret; + fence = reservation_object_get_excl_rcu(bo->resv); drm_atomic_set_fence_for_plane(state, fence); return 0; } +static void vc4_cleanup_fb(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct vc4_bo *bo; + + if (plane->state->fb == state->fb || !state->fb) + return; + + bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base); + vc4_bo_dec_usecnt(bo); +} + static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { .atomic_check = vc4_plane_atomic_check, .atomic_update = vc4_plane_atomic_update, .prepare_fb = vc4_prepare_fb, + .cleanup_fb = vc4_cleanup_fb, }; static void vc4_plane_destroy(struct drm_plane *plane) diff --git a/drivers/gpu/drm/via/via_dmablit.c b/drivers/gpu/drm/via/via_dmablit.c index 98aae9809249..32c9938e1e1e 100644 --- a/drivers/gpu/drm/via/via_dmablit.c +++ b/drivers/gpu/drm/via/via_dmablit.c @@ -238,9 +238,9 @@ via_lock_all_dma_pages(drm_via_sg_info_t *vsg, drm_via_dmablit_t *xfer) vsg->pages = vzalloc(sizeof(struct page *) * vsg->num_pages); if (NULL == vsg->pages) return -ENOMEM; - ret = get_user_pages_unlocked((unsigned long)xfer->mem_addr, - vsg->num_pages, vsg->pages, - (vsg->direction == DMA_FROM_DEVICE) ? FOLL_WRITE : 0); + ret = get_user_pages_fast((unsigned long)xfer->mem_addr, + vsg->num_pages, vsg->direction == DMA_FROM_DEVICE, + vsg->pages); if (ret != vsg->num_pages) { if (ret < 0) return ret; diff --git a/drivers/gpu/drm/virtio/Makefile b/drivers/gpu/drm/virtio/Makefile index 7684f613bdc3..f29deec83d1f 100644 --- a/drivers/gpu/drm/virtio/Makefile +++ b/drivers/gpu/drm/virtio/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile index a365330bbb82..ad80211e1098 100644 --- a/drivers/gpu/drm/vmwgfx/Makefile +++ b/drivers/gpu/drm/vmwgfx/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \ vmwgfx_fb.o vmwgfx_ioctl.o vmwgfx_resource.o vmwgfx_buffer.o \ vmwgfx_fifo.o vmwgfx_irq.o vmwgfx_ldu.o vmwgfx_ttm_glue.o \ diff --git a/drivers/gpu/drm/vmwgfx/device_include/vm_basic_types.h b/drivers/gpu/drm/vmwgfx/device_include/vm_basic_types.h index 120eab830eaf..3a195e8106b3 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/vm_basic_types.h +++ b/drivers/gpu/drm/vmwgfx/device_include/vm_basic_types.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _VM_BASIC_TYPES_H_ #define _VM_BASIC_TYPES_H_ #include <linux/kernel.h> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index e84fee3ec4f3..184340d486c3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -721,7 +721,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) * allocation taken by fbdev */ if (!(dev_priv->capabilities & SVGA_CAP_3D)) - mem_size *= 2; + mem_size *= 3; dev_priv->max_mob_pages = mem_size * 1024 / PAGE_SIZE; dev_priv->prim_bb_mem = diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 3bbad22b3748..d6b1c509ae01 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -224,7 +224,7 @@ out: return ret; } -static struct dma_fence_ops vmw_fence_ops = { +static const struct dma_fence_ops vmw_fence_ops = { .get_driver_name = vmw_fence_get_driver_name, .get_timeline_name = vmw_fence_get_timeline_name, .enable_signaling = vmw_fence_enable_signaling, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index d1552d3e0652..bc5f6026573d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -360,8 +360,8 @@ static int vmw_sou_crtc_page_flip(struct drm_crtc *crtc, ret = vmw_event_fence_action_queue(file_priv, fence, &event->base, - &event->event.tv_sec, - &event->event.tv_usec, + &event->event.vbl.tv_sec, + &event->event.vbl.tv_usec, true); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index ca3afae2db1f..90b5437fd787 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -549,8 +549,8 @@ static int vmw_stdu_crtc_page_flip(struct drm_crtc *crtc, ret = vmw_event_fence_action_queue(file_priv, fence, &event->base, - &event->event.tv_sec, - &event->event.tv_usec, + &event->event.vbl.tv_sec, + &event->event.vbl.tv_usec, true); vmw_fence_obj_unreference(&fence); } else { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index a552e4ea5440..6ac094ee8983 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -904,7 +904,7 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv, if (unlikely(drm_is_render_client(file_priv))) require_exist = true; - if (ACCESS_ONCE(vmw_fpriv(file_priv)->locked_master)) { + if (READ_ONCE(vmw_fpriv(file_priv)->locked_master)) { DRM_ERROR("Locked master refused legacy " "surface reference.\n"); return -EACCES; diff --git a/drivers/gpu/drm/zte/Makefile b/drivers/gpu/drm/zte/Makefile index 9df7766a7f9d..b6d966d849dd 100644 --- a/drivers/gpu/drm/zte/Makefile +++ b/drivers/gpu/drm/zte/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 zxdrm-y := \ zx_drm_drv.o \ zx_hdmi.o \ |