diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
128 files changed, 13345 insertions, 7540 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 61360e27715f..26682454a446 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -5,15 +5,23 @@ config DRM_AMDGPU_SI Choose this option if you want to enable experimental support for SI asics. + SI is already supported in radeon. Experimental support for SI + in amdgpu will be disabled by default and is still provided by + radeon. Use module options to override this: + + radeon.si_support=0 amdgpu.si_support=1 + config DRM_AMDGPU_CIK bool "Enable amdgpu support for CIK parts" depends on DRM_AMDGPU help - Choose this option if you want to enable experimental support - for CIK asics. + Choose this option if you want to enable support for CIK asics. + + CIK is already supported in radeon. Support for CIK in amdgpu + will be disabled by default and is still provided by radeon. + Use module options to override this: - CIK is already supported in radeon. CIK support in amdgpu - is for experimentation and testing. + radeon.cik_support=0 amdgpu.cik_support=1 config DRM_AMDGPU_USERPTR bool "Always enable userptr write support" diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 660786aba7d2..658bac0cdc5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -4,7 +4,7 @@ FULL_AMD_PATH=$(src)/.. -ccflags-y := -Iinclude/drm -I$(FULL_AMD_PATH)/include/asic_reg \ +ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \ -I$(FULL_AMD_PATH)/include \ -I$(FULL_AMD_PATH)/amdgpu \ -I$(FULL_AMD_PATH)/scheduler \ @@ -24,7 +24,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ - amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o + amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ + amdgpu_queue_mgr.o amdgpu_vf_error.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ @@ -34,7 +35,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o amdgpu-y += \ - vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o + vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o nbio_v7_0.o # add GMC block amdgpu-y += \ @@ -54,7 +55,8 @@ amdgpu-y += \ # add PSP block amdgpu-y += \ amdgpu_psp.o \ - psp_v3_1.o + psp_v3_1.o \ + psp_v10_0.o # add SMC block amdgpu-y += \ @@ -92,6 +94,11 @@ amdgpu-y += \ vce_v3_0.o \ vce_v4_0.o +# add VCN block +amdgpu-y += \ + amdgpu_vcn.o \ + vcn_v1_0.o + # add amdkfd interfaces amdgpu-y += \ amdgpu_amdkfd.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 833c3c16501a..103635ab784c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -36,16 +36,18 @@ #include <linux/hashtable.h> #include <linux/dma-fence.h> -#include <ttm/ttm_bo_api.h> -#include <ttm/ttm_bo_driver.h> -#include <ttm/ttm_placement.h> -#include <ttm/ttm_module.h> -#include <ttm/ttm_execbuf_util.h> +#include <drm/ttm/ttm_bo_api.h> +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> +#include <drm/ttm/ttm_module.h> +#include <drm/ttm/ttm_execbuf_util.h> #include <drm/drmP.h> #include <drm/drm_gem.h> #include <drm/amdgpu_drm.h> +#include <kgd_kfd_interface.h> + #include "amd_shared.h" #include "amdgpu_mode.h" #include "amdgpu_ih.h" @@ -62,16 +64,20 @@ #include "amdgpu_acp.h" #include "amdgpu_uvd.h" #include "amdgpu_vce.h" +#include "amdgpu_vcn.h" #include "gpu_scheduler.h" #include "amdgpu_virt.h" +#include "amdgpu_gart.h" /* * Modules parameters. */ extern int amdgpu_modeset; extern int amdgpu_vram_limit; +extern int amdgpu_vis_vram_limit; extern int amdgpu_gart_size; +extern int amdgpu_gtt_size; extern int amdgpu_moverate; extern int amdgpu_benchmarking; extern int amdgpu_testing; @@ -90,8 +96,10 @@ extern int amdgpu_bapm; extern int amdgpu_deep_color; extern int amdgpu_vm_size; extern int amdgpu_vm_block_size; +extern int amdgpu_vm_fragment_size; extern int amdgpu_vm_fault_stop; extern int amdgpu_vm_debug; +extern int amdgpu_vm_update_mode; extern int amdgpu_sched_jobs; extern int amdgpu_sched_hw_submission; extern int amdgpu_no_evict; @@ -100,6 +108,7 @@ extern unsigned amdgpu_pcie_gen_cap; extern unsigned amdgpu_pcie_lane_cap; extern unsigned amdgpu_cg_mask; extern unsigned amdgpu_pg_mask; +extern unsigned amdgpu_sdma_phase_quantum; extern char *amdgpu_disable_cu; extern char *amdgpu_virtual_display; extern unsigned amdgpu_pp_feature_mask; @@ -109,6 +118,15 @@ extern int amdgpu_prim_buf_per_se; extern int amdgpu_pos_buf_per_se; extern int amdgpu_cntl_sb_buf_per_se; extern int amdgpu_param_buf_per_se; +extern int amdgpu_job_hang_limit; +extern int amdgpu_lbpw; + +#ifdef CONFIG_DRM_AMDGPU_SI +extern int amdgpu_si_support; +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK +extern int amdgpu_cik_support; +#endif #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 @@ -305,8 +323,8 @@ struct amdgpu_gart_funcs { /* set pte flags based per asic */ uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev, uint32_t flags); - /* adjust mc addr in fb for APU case */ - u64 (*adjust_mc_addr)(struct amdgpu_device *adev, u64 addr); + /* get the pde for a given mc addr */ + u64 (*get_vm_pde)(struct amdgpu_device *adev, u64 addr); uint32_t (*get_invalidate_req)(unsigned int vm_id); }; @@ -356,78 +374,10 @@ struct amdgpu_clock { }; /* - * BO. + * GEM. */ -struct amdgpu_bo_list_entry { - struct amdgpu_bo *robj; - struct ttm_validate_buffer tv; - struct amdgpu_bo_va *bo_va; - uint32_t priority; - struct page **user_pages; - int user_invalidated; -}; - -struct amdgpu_bo_va_mapping { - struct list_head list; - struct rb_node rb; - uint64_t start; - uint64_t last; - uint64_t __subtree_last; - uint64_t offset; - uint64_t flags; -}; - -/* bo virtual addresses in a specific vm */ -struct amdgpu_bo_va { - /* protected by bo being reserved */ - struct list_head bo_list; - struct dma_fence *last_pt_update; - unsigned ref_count; - - /* protected by vm mutex and spinlock */ - struct list_head vm_status; - - /* mappings for this bo_va */ - struct list_head invalids; - struct list_head valids; - - /* constant after initialization */ - struct amdgpu_vm *vm; - struct amdgpu_bo *bo; -}; #define AMDGPU_GEM_DOMAIN_MAX 0x3 - -struct amdgpu_bo { - /* Protected by tbo.reserved */ - u32 prefered_domains; - u32 allowed_domains; - struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; - struct ttm_placement placement; - struct ttm_buffer_object tbo; - struct ttm_bo_kmap_obj kmap; - u64 flags; - unsigned pin_count; - void *kptr; - u64 tiling_flags; - u64 metadata_flags; - void *metadata; - u32 metadata_size; - unsigned prime_shared_count; - /* list of all virtual address to which this bo - * is associated to - */ - struct list_head va; - /* Constant after initialization */ - struct drm_gem_object gem_base; - struct amdgpu_bo *parent; - struct amdgpu_bo *shadow; - - struct ttm_bo_kmap_obj dma_buf_vmap; - struct amdgpu_mn *mn; - struct list_head mn_list; - struct list_head shadow_list; -}; #define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base) void amdgpu_gem_object_free(struct drm_gem_object *obj); @@ -519,49 +469,6 @@ int amdgpu_fence_slab_init(void); void amdgpu_fence_slab_fini(void); /* - * GART structures, functions & helpers - */ -struct amdgpu_mc; - -#define AMDGPU_GPU_PAGE_SIZE 4096 -#define AMDGPU_GPU_PAGE_MASK (AMDGPU_GPU_PAGE_SIZE - 1) -#define AMDGPU_GPU_PAGE_SHIFT 12 -#define AMDGPU_GPU_PAGE_ALIGN(a) (((a) + AMDGPU_GPU_PAGE_MASK) & ~AMDGPU_GPU_PAGE_MASK) - -struct amdgpu_gart { - dma_addr_t table_addr; - struct amdgpu_bo *robj; - void *ptr; - unsigned num_gpu_pages; - unsigned num_cpu_pages; - unsigned table_size; -#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS - struct page **pages; -#endif - bool ready; - - /* Asic default pte flags */ - uint64_t gart_pte_flags; - - const struct amdgpu_gart_funcs *gart_funcs; -}; - -int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev); -void amdgpu_gart_table_ram_free(struct amdgpu_device *adev); -int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev); -void amdgpu_gart_table_vram_free(struct amdgpu_device *adev); -int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); -void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); -int amdgpu_gart_init(struct amdgpu_device *adev); -void amdgpu_gart_fini(struct amdgpu_device *adev); -void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, - int pages); -int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, - int pages, struct page **pagelist, - dma_addr_t *dma_addr, uint64_t flags); -int amdgpu_ttm_recover_gart(struct amdgpu_device *adev); - -/* * VMHUB structures, functions & helpers */ struct amdgpu_vmhub { @@ -585,23 +492,22 @@ struct amdgpu_mc { * about vram size near mc fb location */ u64 mc_vram_size; u64 visible_vram_size; - u64 gtt_size; - u64 gtt_start; - u64 gtt_end; + u64 gart_size; + u64 gart_start; + u64 gart_end; u64 vram_start; u64 vram_end; unsigned vram_width; u64 real_vram_size; int vram_mtrr; - u64 gtt_base_align; u64 mc_mask; const struct firmware *fw; /* MC firmware */ uint32_t fw_version; struct amdgpu_irq_src vm_fault; uint32_t vram_type; uint32_t srbm_soft_reset; - struct amdgpu_mode_mc_save save; bool prt_warning; + uint64_t stolen_size; /* apertures */ u64 shared_aperture_start; u64 shared_aperture_end; @@ -705,15 +611,15 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT /* overlap the doorbell assignment with VCN as they are mutually exclusive * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD */ - AMDGPU_DOORBELL64_RING0_1 = 0xF8, - AMDGPU_DOORBELL64_RING2_3 = 0xF9, - AMDGPU_DOORBELL64_RING4_5 = 0xFA, - AMDGPU_DOORBELL64_RING6_7 = 0xFB, + AMDGPU_DOORBELL64_UVD_RING0_1 = 0xF8, + AMDGPU_DOORBELL64_UVD_RING2_3 = 0xF9, + AMDGPU_DOORBELL64_UVD_RING4_5 = 0xFA, + AMDGPU_DOORBELL64_UVD_RING6_7 = 0xFB, - AMDGPU_DOORBELL64_UVD_RING0_1 = 0xFC, - AMDGPU_DOORBELL64_UVD_RING2_3 = 0xFD, - AMDGPU_DOORBELL64_UVD_RING4_5 = 0xFE, - AMDGPU_DOORBELL64_UVD_RING6_7 = 0xFF, + AMDGPU_DOORBELL64_VCE_RING0_1 = 0xFC, + AMDGPU_DOORBELL64_VCE_RING2_3 = 0xFD, + AMDGPU_DOORBELL64_VCE_RING4_5 = 0xFE, + AMDGPU_DOORBELL64_VCE_RING6_7 = 0xFF, AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF, AMDGPU_DOORBELL64_INVALID = 0xFFFF @@ -772,6 +678,29 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, struct dma_fence **f); /* + * Queue manager + */ +struct amdgpu_queue_mapper { + int hw_ip; + struct mutex lock; + /* protected by lock */ + struct amdgpu_ring *queue_map[AMDGPU_MAX_RINGS]; +}; + +struct amdgpu_queue_mgr { + struct amdgpu_queue_mapper mapper[AMDGPU_MAX_IP_NUM]; +}; + +int amdgpu_queue_mgr_init(struct amdgpu_device *adev, + struct amdgpu_queue_mgr *mgr); +int amdgpu_queue_mgr_fini(struct amdgpu_device *adev, + struct amdgpu_queue_mgr *mgr); +int amdgpu_queue_mgr_map(struct amdgpu_device *adev, + struct amdgpu_queue_mgr *mgr, + int hw_ip, int instance, int ring, + struct amdgpu_ring **out_ring); + +/* * context related structures */ @@ -784,6 +713,7 @@ struct amdgpu_ctx_ring { struct amdgpu_ctx { struct kref refcount; struct amdgpu_device *adev; + struct amdgpu_queue_mgr queue_mgr; unsigned reset_counter; spinlock_t ring_lock; struct dma_fence **fences; @@ -819,17 +749,29 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); struct amdgpu_fpriv { struct amdgpu_vm vm; struct amdgpu_bo_va *prt_va; + struct amdgpu_bo_va *csa_va; struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; + u32 vram_lost_counter; }; /* * residency list */ +struct amdgpu_bo_list_entry { + struct amdgpu_bo *robj; + struct ttm_validate_buffer tv; + struct amdgpu_bo_va *bo_va; + uint32_t priority; + struct page **user_pages; + int user_invalidated; +}; struct amdgpu_bo_list { struct mutex lock; + struct rcu_head rhead; + struct kref refcount; struct amdgpu_bo *gds_obj; struct amdgpu_bo *gws_obj; struct amdgpu_bo *oa_obj; @@ -893,20 +835,26 @@ struct amdgpu_rlc { u32 *register_restore; }; +#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES + struct amdgpu_mec { struct amdgpu_bo *hpd_eop_obj; u64 hpd_eop_gpu_addr; struct amdgpu_bo *mec_fw_obj; u64 mec_fw_gpu_addr; - u32 num_pipe; u32 num_mec; - u32 num_queue; + u32 num_pipe_per_mec; + u32 num_queue_per_pipe; void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1]; + + /* These are the resources for which amdgpu takes ownership */ + DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); }; struct amdgpu_kiq { u64 eop_gpu_addr; struct amdgpu_bo *eop_obj; + struct mutex ring_mutex; struct amdgpu_ring ring; struct amdgpu_irq_src irq; }; @@ -981,9 +929,15 @@ struct amdgpu_gfx_config { }; struct amdgpu_cu_info { - uint32_t number; /* total active CU number */ - uint32_t ao_cu_mask; + uint32_t max_waves_per_simd; uint32_t wave_front_size; + uint32_t max_scratch_slots_per_cu; + uint32_t lds_size; + + /* total active CU number */ + uint32_t number; + uint32_t ao_cu_mask; + uint32_t ao_cu_bitmap[4][4]; uint32_t bitmap[4][4]; }; @@ -1061,6 +1015,8 @@ struct amdgpu_gfx { uint32_t grbm_soft_reset; uint32_t srbm_soft_reset; bool in_reset; + /* s3/s4 mask */ + bool in_suspend; /* NGG */ struct amdgpu_ngg ngg; }; @@ -1104,17 +1060,21 @@ struct amdgpu_cs_parser { struct list_head validated; struct dma_fence *fence; uint64_t bytes_moved_threshold; + uint64_t bytes_moved_vis_threshold; uint64_t bytes_moved; + uint64_t bytes_moved_vis; struct amdgpu_bo_list_entry *evictable; /* user fence */ struct amdgpu_bo_list_entry uf_entry; + + unsigned num_post_dep_syncobjs; + struct drm_syncobj **post_dep_syncobjs; }; #define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */ #define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */ #define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */ -#define AMDGPU_VM_DOMAIN (1 << 3) /* bit set means in virtual memory context */ struct amdgpu_job { struct amd_sched_job base; @@ -1122,6 +1082,8 @@ struct amdgpu_job { struct amdgpu_vm *vm; struct amdgpu_ring *ring; struct amdgpu_sync sync; + struct amdgpu_sync dep_sync; + struct amdgpu_sync sched_sync; struct amdgpu_ib *ibs; struct dma_fence *fence; /* the hw fence */ uint32_t preamble_status; @@ -1129,7 +1091,6 @@ struct amdgpu_job { void *owner; uint64_t fence_ctx; /* the fence_context this job uses */ bool vm_needs_flush; - bool need_pipeline_sync; unsigned vm_id; uint64_t vm_pd_addr; uint32_t gds_base, gds_size; @@ -1172,8 +1133,6 @@ struct amdgpu_wb { int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb); void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb); -int amdgpu_wb_get_64bit(struct amdgpu_device *adev, u32 *wb); -void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb); void amdgpu_get_pcie_info(struct amdgpu_device *adev); @@ -1221,6 +1180,9 @@ struct amdgpu_firmware { const struct amdgpu_psp_funcs *funcs; struct amdgpu_bo *rbuf; struct mutex mutex; + + /* gpu info firmware data pointer */ + const struct firmware *gpu_info_fw; }; /* @@ -1296,7 +1258,6 @@ struct amdgpu_smumgr { */ struct amdgpu_allowed_register_entry { uint32_t reg_offset; - bool untouched; bool grbm_indexed; }; @@ -1424,6 +1385,7 @@ typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); +#define AMDGPU_RESET_MAGIC_NUM 64 struct amdgpu_device { struct device *dev; struct drm_device *ddev; @@ -1464,7 +1426,7 @@ struct amdgpu_device { bool is_atom_fw; uint8_t *bios; uint32_t bios_size; - struct amdgpu_bo *stollen_vga_memory; + struct amdgpu_bo *stolen_vga_memory; uint32_t bios_scratch_reg_offset; uint32_t bios_scratch[AMDGPU_BIOS_NUM_SCRATCH]; @@ -1496,6 +1458,10 @@ struct amdgpu_device { spinlock_t gc_cac_idx_lock; amdgpu_rreg_t gc_cac_rreg; amdgpu_wreg_t gc_cac_wreg; + /* protects concurrent se_cac register access */ + spinlock_t se_cac_idx_lock; + amdgpu_rreg_t se_cac_rreg; + amdgpu_wreg_t se_cac_wreg; /* protects concurrent ENDPOINT (audio) register access */ spinlock_t audio_endpt_idx_lock; amdgpu_block_rreg_t audio_endpt_rreg; @@ -1518,18 +1484,18 @@ struct amdgpu_device { struct amdgpu_mman mman; struct amdgpu_vram_scratch vram_scratch; struct amdgpu_wb wb; - atomic64_t vram_usage; - atomic64_t vram_vis_usage; - atomic64_t gtt_usage; atomic64_t num_bytes_moved; atomic64_t num_evictions; + atomic64_t num_vram_cpu_page_faults; atomic_t gpu_reset_counter; + atomic_t vram_lost_counter; /* data for buffer migration throttling */ struct { spinlock_t lock; s64 last_update_us; s64 accum_us; /* accumulated microseconds */ + s64 accum_us_vis; /* for visible VRAM */ u32 log2_max_MBps; } mm_stats; @@ -1570,11 +1536,18 @@ struct amdgpu_device { /* sdma */ struct amdgpu_sdma sdma; - /* uvd */ - struct amdgpu_uvd uvd; + union { + struct { + /* uvd */ + struct amdgpu_uvd uvd; + + /* vce */ + struct amdgpu_vce vce; + }; - /* vce */ - struct amdgpu_vce vce; + /* vcn */ + struct amdgpu_vcn vcn; + }; /* firmwares */ struct amdgpu_firmware firmware; @@ -1598,6 +1571,9 @@ struct amdgpu_device { /* amdkfd interface */ struct kfd_dev *kfd; + /* delayed work_func for deferring clockgating during resume */ + struct delayed_work late_init_work; + struct amdgpu_virt virt; /* link all shadow bo */ @@ -1606,10 +1582,16 @@ struct amdgpu_device { /* link all gtt */ spinlock_t gtt_list_lock; struct list_head gtt_list; + /* keep an lru list of rings by HW IP */ + struct list_head ring_lru_list; + spinlock_t ring_lru_list_lock; /* record hw reset is performed */ bool has_hw_reset; + u8 reset_magic[AMDGPU_RESET_MAGIC_NUM]; + /* record last mm index being written through WREG32*/ + unsigned long last_mm_index; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) @@ -1617,7 +1599,6 @@ static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) return container_of(bdev, struct amdgpu_device, mman.bdev); } -bool amdgpu_device_is_px(struct drm_device *dev); int amdgpu_device_init(struct amdgpu_device *adev, struct drm_device *ddev, struct pci_dev *pdev, @@ -1666,6 +1647,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v); #define WREG32_DIDT(reg, v) adev->didt_wreg(adev, (reg), (v)) #define RREG32_GC_CAC(reg) adev->gc_cac_rreg(adev, (reg)) #define WREG32_GC_CAC(reg, v) adev->gc_cac_wreg(adev, (reg), (v)) +#define RREG32_SE_CAC(reg) adev->se_cac_rreg(adev, (reg)) +#define WREG32_SE_CAC(reg, v) adev->se_cac_wreg(adev, (reg), (v)) #define RREG32_AUDIO_ENDPT(block, reg) adev->audio_endpt_rreg(adev, (block), (reg)) #define WREG32_AUDIO_ENDPT(block, reg, v) adev->audio_endpt_wreg(adev, (block), (reg), (v)) #define WREG32_P(reg, val, mask) \ @@ -1716,49 +1699,6 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v); #define RBIOS16(i) (RBIOS8(i) | (RBIOS8((i)+1) << 8)) #define RBIOS32(i) ((RBIOS16(i)) | (RBIOS16((i)+2) << 16)) -/* - * RING helpers. - */ -static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) -{ - if (ring->count_dw <= 0) - DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); - ring->ring[ring->wptr++ & ring->buf_mask] = v; - ring->wptr &= ring->ptr_mask; - ring->count_dw--; -} - -static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *src, int count_dw) -{ - unsigned occupied, chunk1, chunk2; - void *dst; - - if (ring->count_dw < count_dw) { - DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); - } else { - occupied = ring->wptr & ring->buf_mask; - dst = (void *)&ring->ring[occupied]; - chunk1 = ring->buf_mask + 1 - occupied; - chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; - chunk2 = count_dw - chunk1; - chunk1 <<= 2; - chunk2 <<= 2; - - if (chunk1) - memcpy(dst, src, chunk1); - - if (chunk2) { - src += chunk1; - dst = (void *)ring->ring; - memcpy(dst, src, chunk2); - } - - ring->wptr += count_dw; - ring->wptr &= ring->ptr_mask; - ring->count_dw -= count_dw; - } -} - static inline struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring *ring) { @@ -1792,6 +1732,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) +#define amdgpu_gart_get_vm_pde(adev, addr) (adev)->gart.gart_funcs->get_vm_pde((adev), (addr)) #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) @@ -1813,13 +1754,13 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) +#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev)) #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) -#define amdgpu_display_set_vga_render_state(adev, r) (adev)->mode_info.funcs->set_vga_render_state((adev), (r)) #define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc)) #define amdgpu_display_vblank_wait(adev, crtc) (adev)->mode_info.funcs->vblank_wait((adev), (crtc)) #define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l)) @@ -1832,8 +1773,6 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos)) #define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c)) #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r)) -#define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s)) -#define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s)) #define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b)) #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) @@ -1848,11 +1787,8 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev); bool amdgpu_need_post(struct amdgpu_device *adev); void amdgpu_update_display_priority(struct amdgpu_device *adev); -int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data); -int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, - u32 ip_instance, u32 ring, - struct amdgpu_ring **out_ring); -void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes); +void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, + u64 num_vis_bytes); void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain); bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); @@ -1868,7 +1804,7 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, struct ttm_mem_reg *mem); void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base); -void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc); +void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc); void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size); int amdgpu_ttm_init(struct amdgpu_device *adev); void amdgpu_ttm_fini(struct amdgpu_device *adev); @@ -1900,6 +1836,8 @@ static inline bool amdgpu_has_atpx(void) { return false; } extern const struct drm_ioctl_desc amdgpu_ioctls_kms[]; extern const int amdgpu_max_kms_ioctl; +bool amdgpu_kms_vram_lost(struct amdgpu_device *adev, + struct amdgpu_fpriv *fpriv); int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags); void amdgpu_driver_unload_kms(struct drm_device *dev); void amdgpu_driver_lastclose_kms(struct drm_device *dev); @@ -1912,10 +1850,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon); u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe); int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe); void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe); -int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe, - int *max_error, - struct timeval *vblank_time, - unsigned flags); long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index 06879d1dcabd..a52795d9b458 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -285,19 +285,20 @@ static int acp_hw_init(void *handle) return 0; else if (r) return r; + if (adev->asic_type != CHIP_STONEY) { + adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL); + if (adev->acp.acp_genpd == NULL) + return -ENOMEM; - adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL); - if (adev->acp.acp_genpd == NULL) - return -ENOMEM; - - adev->acp.acp_genpd->gpd.name = "ACP_AUDIO"; - adev->acp.acp_genpd->gpd.power_off = acp_poweroff; - adev->acp.acp_genpd->gpd.power_on = acp_poweron; + adev->acp.acp_genpd->gpd.name = "ACP_AUDIO"; + adev->acp.acp_genpd->gpd.power_off = acp_poweroff; + adev->acp.acp_genpd->gpd.power_on = acp_poweron; - adev->acp.acp_genpd->cgs_dev = adev->acp.cgs_device; + adev->acp.acp_genpd->cgs_dev = adev->acp.cgs_device; - pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false); + pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false); + } adev->acp.acp_cell = kzalloc(sizeof(struct mfd_cell) * ACP_DEVS, GFP_KERNEL); @@ -319,14 +320,29 @@ static int acp_hw_init(void *handle) return -ENOMEM; } - i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + } i2s_pdata[0].cap = DWC_I2S_PLAY; i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET; i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_COMP_PARAM1 | + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_COMP_PARAM1; + } - i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | - DW_I2S_QUIRK_COMP_PARAM1; i2s_pdata[1].cap = DWC_I2S_RECORD; i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000; i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; @@ -373,12 +389,14 @@ static int acp_hw_init(void *handle) if (r) return r; - for (i = 0; i < ACP_DEVS ; i++) { - dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); - r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev); - if (r) { - dev_err(dev, "Failed to add dev to genpd\n"); - return r; + if (adev->asic_type != CHIP_STONEY) { + for (i = 0; i < ACP_DEVS ; i++) { + dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); + r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev); + if (r) { + dev_err(dev, "Failed to add dev to genpd\n"); + return r; + } } } @@ -398,20 +416,22 @@ static int acp_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* return early if no ACP */ - if (!adev->acp.acp_genpd) + if (!adev->acp.acp_cell) return 0; - for (i = 0; i < ACP_DEVS ; i++) { - dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); - ret = pm_genpd_remove_device(&adev->acp.acp_genpd->gpd, dev); - /* If removal fails, dont giveup and try rest */ - if (ret) - dev_err(dev, "remove dev from genpd failed\n"); + if (adev->acp.acp_genpd) { + for (i = 0; i < ACP_DEVS ; i++) { + dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); + ret = pm_genpd_remove_device(&adev->acp.acp_genpd->gpd, dev); + /* If removal fails, dont giveup and try rest */ + if (ret) + dev_err(dev, "remove dev from genpd failed\n"); + } + kfree(adev->acp.acp_genpd); } mfd_remove_devices(adev->acp.parent); kfree(adev->acp.acp_res); - kfree(adev->acp.acp_genpd); kfree(adev->acp.acp_cell); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index ef79551b4cb7..57afad79f55d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -30,10 +30,10 @@ #include <drm/drmP.h> #include <drm/drm_crtc_helper.h> #include "amdgpu.h" +#include "amdgpu_pm.h" #include "amd_acpi.h" #include "atom.h" -extern void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev); /* Call the ATIF method */ /** @@ -289,7 +289,7 @@ out: * handles it. * Returns NOTIFY code */ -int amdgpu_atif_handler(struct amdgpu_device *adev, +static int amdgpu_atif_handler(struct amdgpu_device *adev, struct acpi_bus_event *event) { struct amdgpu_atif *atif = &adev->atif; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index dba8a5b25e66..5432af39a674 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -24,18 +24,18 @@ #include "amd_shared.h" #include <drm/drmP.h> #include "amdgpu.h" +#include "amdgpu_gfx.h" #include <linux/module.h> -const struct kfd2kgd_calls *kfd2kgd; const struct kgd2kfd_calls *kgd2kfd; -bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); +bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); int amdgpu_amdkfd_init(void) { int ret; #if defined(CONFIG_HSA_AMD_MODULE) - int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); + int (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); kgd2kfd_init_p = symbol_request(kgd2kfd_init); @@ -60,9 +60,22 @@ int amdgpu_amdkfd_init(void) return ret; } -bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev) +void amdgpu_amdkfd_fini(void) { - switch (rdev->asic_type) { + if (kgd2kfd) { + kgd2kfd->exit(); + symbol_put(kgd2kfd_init); + } +} + +void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) +{ + const struct kfd2kgd_calls *kfd2kgd; + + if (!kgd2kfd) + return; + + switch (adev->asic_type) { #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_KAVERI: kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); @@ -72,73 +85,83 @@ bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev) kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); break; default: - return false; - } - - return true; -} - -void amdgpu_amdkfd_fini(void) -{ - if (kgd2kfd) { - kgd2kfd->exit(); - symbol_put(kgd2kfd_init); + dev_info(adev->dev, "kfd not supported on this ASIC\n"); + return; } -} -void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev) -{ - if (kgd2kfd) - rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev, - rdev->pdev, kfd2kgd); + adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, + adev->pdev, kfd2kgd); } -void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev) +void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { - if (rdev->kfd) { + int i; + int last_valid_bit; + if (adev->kfd) { struct kgd2kfd_shared_resources gpu_resources = { .compute_vmid_bitmap = 0xFF00, - - .first_compute_pipe = 1, - .compute_pipe_count = 4 - 1, + .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, + .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe }; - amdgpu_doorbell_get_kfd_info(rdev, + /* this is going to have a few of the MSBs set that we need to + * clear */ + bitmap_complement(gpu_resources.queue_bitmap, + adev->gfx.mec.queue_bitmap, + KGD_MAX_QUEUES); + + /* remove the KIQ bit as well */ + if (adev->gfx.kiq.ring.ready) + clear_bit(amdgpu_gfx_queue_to_bit(adev, + adev->gfx.kiq.ring.me - 1, + adev->gfx.kiq.ring.pipe, + adev->gfx.kiq.ring.queue), + gpu_resources.queue_bitmap); + + /* According to linux/bitmap.h we shouldn't use bitmap_clear if + * nbits is not compile time constant */ + last_valid_bit = 1 /* only first MEC can have compute queues */ + * adev->gfx.mec.num_pipe_per_mec + * adev->gfx.mec.num_queue_per_pipe; + for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) + clear_bit(i, gpu_resources.queue_bitmap); + + amdgpu_doorbell_get_kfd_info(adev, &gpu_resources.doorbell_physical_address, &gpu_resources.doorbell_aperture_size, &gpu_resources.doorbell_start_offset); - kgd2kfd->device_init(rdev->kfd, &gpu_resources); + kgd2kfd->device_init(adev->kfd, &gpu_resources); } } -void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev) +void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) { - if (rdev->kfd) { - kgd2kfd->device_exit(rdev->kfd); - rdev->kfd = NULL; + if (adev->kfd) { + kgd2kfd->device_exit(adev->kfd); + adev->kfd = NULL; } } -void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev, +void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry) { - if (rdev->kfd) - kgd2kfd->interrupt(rdev->kfd, ih_ring_entry); + if (adev->kfd) + kgd2kfd->interrupt(adev->kfd, ih_ring_entry); } -void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev) +void amdgpu_amdkfd_suspend(struct amdgpu_device *adev) { - if (rdev->kfd) - kgd2kfd->suspend(rdev->kfd); + if (adev->kfd) + kgd2kfd->suspend(adev->kfd); } -int amdgpu_amdkfd_resume(struct amdgpu_device *rdev) +int amdgpu_amdkfd_resume(struct amdgpu_device *adev) { int r = 0; - if (rdev->kfd) - r = kgd2kfd->resume(rdev->kfd); + if (adev->kfd) + r = kgd2kfd->resume(adev->kfd); return r; } @@ -147,7 +170,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr) { - struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct kgd_mem **mem = (struct kgd_mem **) mem_obj; int r; @@ -159,10 +182,11 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, if ((*mem) == NULL) return -ENOMEM; - r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo); + r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, 0, + &(*mem)->bo); if (r) { - dev_err(rdev->dev, + dev_err(adev->dev, "failed to allocate BO for amdkfd (%d)\n", r); return r; } @@ -170,21 +194,21 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, /* map the buffer */ r = amdgpu_bo_reserve((*mem)->bo, true); if (r) { - dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r); + dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r); goto allocate_mem_reserve_bo_failed; } r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT, &(*mem)->gpu_addr); if (r) { - dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r); + dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r); goto allocate_mem_pin_bo_failed; } *gpu_addr = (*mem)->gpu_addr; r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); if (r) { - dev_err(rdev->dev, + dev_err(adev->dev, "(%d) failed to map bo to kernel for amdkfd\n", r); goto allocate_mem_kmap_bo_failed; } @@ -220,27 +244,27 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) uint64_t get_vmem_size(struct kgd_dev *kgd) { - struct amdgpu_device *rdev = + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; BUG_ON(kgd == NULL); - return rdev->mc.real_vram_size; + return adev->mc.real_vram_size; } uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) { - struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - if (rdev->gfx.funcs->get_gpu_clock_counter) - return rdev->gfx.funcs->get_gpu_clock_counter(rdev); + if (adev->gfx.funcs->get_gpu_clock_counter) + return adev->gfx.funcs->get_gpu_clock_counter(adev); return 0; } uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) { - struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; /* The sclk is in quantas of 10kHz */ - return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; + return adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index de530f68d4e3..8d689ab7e429 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -26,6 +26,7 @@ #define AMDGPU_AMDKFD_H_INCLUDED #include <linux/types.h> +#include <linux/mmu_context.h> #include <kgd_kfd_interface.h> struct amdgpu_device; @@ -39,15 +40,13 @@ struct kgd_mem { int amdgpu_amdkfd_init(void); void amdgpu_amdkfd_fini(void); -bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev); - -void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev); -int amdgpu_amdkfd_resume(struct amdgpu_device *rdev); -void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev, +void amdgpu_amdkfd_suspend(struct amdgpu_device *adev); +int amdgpu_amdkfd_resume(struct amdgpu_device *adev); +void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry); -void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev); -void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev); -void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev); +void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); +void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); +void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); @@ -62,4 +61,19 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); +#define read_user_wptr(mmptr, wptr, dst) \ + ({ \ + bool valid = false; \ + if ((mmptr) && (wptr)) { \ + if ((mmptr) == current->mm) { \ + valid = !get_user((dst), (wptr)); \ + } else if (current->mm == NULL) { \ + use_mm(mmptr); \ + valid = !get_user((dst), (wptr)); \ + unuse_mm(mmptr); \ + } \ + } \ + valid; \ + }) + #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 1a0a5f7cccbc..b9dbbf9cb8b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -29,6 +29,7 @@ #include "cikd.h" #include "cik_sdma.h" #include "amdgpu_ucode.h" +#include "gfx_v7_0.h" #include "gca/gfx_7_2_d.h" #include "gca/gfx_7_2_enum.h" #include "gca/gfx_7_2_sh_mask.h" @@ -38,7 +39,11 @@ #include "gmc/gmc_7_1_sh_mask.h" #include "cik_structs.h" -#define CIK_PIPE_PER_MEC (4) +enum hqd_dequeue_request_type { + NO_ACTION = 0, + DRAIN_PIPE, + RESET_WAVES +}; enum { MAX_TRAPID = 8, /* 3 bits in the bitfield. */ @@ -97,12 +102,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr); + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm); static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id); static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); @@ -127,6 +135,33 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); + +/* Because of REG_GET_FIELD() being used, we put this function in the + * asic specific file. + */ +static int get_tile_config(struct kgd_dev *kgd, + struct tile_config *config) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + config->gb_addr_config = adev->gfx.config.gb_addr_config; + config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFBANK); + config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFRANKS); + + config->tile_config_ptr = adev->gfx.config.tile_mode_array; + config->num_tile_configs = + ARRAY_SIZE(adev->gfx.config.tile_mode_array); + config->macro_tile_config_ptr = + adev->gfx.config.macrotile_mode_array; + config->num_macro_tile_configs = + ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); + + return 0; +} static const struct kfd2kgd_calls kfd2kgd = { .init_gtt_mem_allocation = alloc_gtt_mem, @@ -151,7 +186,9 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, .write_vmid_invalidate_request = write_vmid_invalidate_request, - .get_fw_version = get_fw_version + .get_fw_version = get_fw_version, + .set_scratch_backing_va = set_scratch_backing_va, + .get_tile_config = get_tile_config, }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) @@ -185,8 +222,10 @@ static void unlock_srbm(struct kgd_dev *kgd) static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id) { - uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; - uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); lock_srbm(kgd, mec, pipe, queue_id, 0); } @@ -243,18 +282,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; - uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); - - lock_srbm(kgd, mec, pipe, 0, 0); - WREG32(mmCP_HPD_EOP_BASE_ADDR, lower_32_bits(hpd_gpu_addr >> 8)); - WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(hpd_gpu_addr >> 8)); - WREG32(mmCP_HPD_EOP_VMID, 0); - WREG32(mmCP_HPD_EOP_CONTROL, hpd_size); - unlock_srbm(kgd); - + /* amdgpu owns the per-pipe state */ return 0; } @@ -264,8 +292,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) uint32_t mec; uint32_t pipe; - mec = (pipe_id / CIK_PIPE_PER_MEC) + 1; - pipe = (pipe_id % CIK_PIPE_PER_MEC); + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); lock_srbm(kgd, mec, pipe, 0, 0); @@ -300,64 +328,38 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) } static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr) + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t wptr_shadow, is_wptr_shadow_valid; struct cik_mqd *m; + uint32_t *mqd_hqd; + uint32_t reg, wptr_val, data; m = get_mqd(mqd); - is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); - acquire_queue(kgd, pipe_id, queue_id); - WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); - WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); - WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control); - - WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); - WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); - WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); - - WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control); - WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo); - WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi); - - WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr); - - WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state); - WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd); - WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type); - - WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo); - WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi); - WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo); - WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - m->cp_hqd_pq_rptr_report_addr_hi); + /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */ + mqd_hqd = &m->cp_mqd_base_addr_lo; - WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr); + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++) + WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, m->cp_hqd_pq_wptr_poll_addr_lo); - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, m->cp_hqd_pq_wptr_poll_addr_hi); - - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control); - - WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid); - - WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum); - - WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); - WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); + /* Copy userspace write pointer value to register. + * Activate doorbell logic to monitor subsequent changes. + */ + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); - WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr); + if (read_user_wptr(mm, wptr, wptr_val)) + WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); - if (is_wptr_shadow_valid) - WREG32(mmCP_HQD_PQ_WPTR, wptr_shadow); + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); + WREG32(mmCP_HQD_ACTIVE, data); - WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active); release_queue(kgd); return 0; @@ -436,30 +438,99 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t temp; - int timeout = utimeout; + enum hqd_dequeue_request_type type; + unsigned long flags, end_jiffies; + int retry; acquire_queue(kgd, pipe_id, queue_id); WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); - WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); + switch (reset_type) { + case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: + type = DRAIN_PIPE; + break; + case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: + type = RESET_WAVES; + break; + default: + type = DRAIN_PIPE; + break; + } + + /* Workaround: If IQ timer is active and the wait time is close to or + * equal to 0, dequeueing is not safe. Wait until either the wait time + * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is + * cleared before continuing. Also, ensure wait times are set to at + * least 0x3. + */ + local_irq_save(flags); + preempt_disable(); + retry = 5000; /* wait for 500 usecs at maximum */ + while (true) { + temp = RREG32(mmCP_HQD_IQ_TIMER); + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { + pr_debug("HW is processing IQ\n"); + goto loop; + } + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) + == 3) /* SEM-rearm is safe */ + break; + /* Wait time 3 is safe for CP, but our MMIO read/write + * time is close to 1 microsecond, so check for 10 to + * leave more buffer room + */ + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) + >= 10) + break; + pr_debug("IQ timer is active\n"); + } else + break; +loop: + if (!retry) { + pr_err("CP HQD IQ timer status time out\n"); + break; + } + ndelay(100); + --retry; + } + retry = 1000; + while (true) { + temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); + if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) + break; + pr_debug("Dequeue request is pending\n"); + + if (!retry) { + pr_err("CP HQD dequeue request time out\n"); + break; + } + ndelay(100); + --retry; + } + local_irq_restore(flags); + preempt_enable(); + WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); + + end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { temp = RREG32(mmCP_HQD_ACTIVE); - if (temp & CP_HQD_ACTIVE__ACTIVE_MASK) + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) break; - if (timeout <= 0) { - pr_err("kfd: cp queue preemption time out.\n"); + if (time_after(jiffies, end_jiffies)) { + pr_err("cp queue preemption time out\n"); release_queue(kgd); return -ETIME; } - msleep(20); - timeout -= 20; + usleep_range(500, 1000); } release_queue(kgd); @@ -610,6 +681,16 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); } +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + lock_srbm(kgd, 0, 0, 0, vmid); + WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); + unlock_srbm(kgd); +} + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; @@ -620,42 +701,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) switch (type) { case KGD_ENGINE_PFP: hdr = (const union amdgpu_firmware_header *) - adev->gfx.pfp_fw->data; + adev->gfx.pfp_fw->data; break; case KGD_ENGINE_ME: hdr = (const union amdgpu_firmware_header *) - adev->gfx.me_fw->data; + adev->gfx.me_fw->data; break; case KGD_ENGINE_CE: hdr = (const union amdgpu_firmware_header *) - adev->gfx.ce_fw->data; + adev->gfx.ce_fw->data; break; case KGD_ENGINE_MEC1: hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec_fw->data; + adev->gfx.mec_fw->data; break; case KGD_ENGINE_MEC2: hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec2_fw->data; + adev->gfx.mec2_fw->data; break; case KGD_ENGINE_RLC: hdr = (const union amdgpu_firmware_header *) - adev->gfx.rlc_fw->data; + adev->gfx.rlc_fw->data; break; case KGD_ENGINE_SDMA1: hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[0].fw->data; + adev->sdma.instance[0].fw->data; break; case KGD_ENGINE_SDMA2: hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[1].fw->data; + adev->sdma.instance[1].fw->data; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 6697612239c2..309f2419c6d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -28,6 +28,7 @@ #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "amdgpu_ucode.h" +#include "gfx_v8_0.h" #include "gca/gfx_8_0_sh_mask.h" #include "gca/gfx_8_0_d.h" #include "gca/gfx_8_0_enum.h" @@ -38,7 +39,11 @@ #include "vi_structs.h" #include "vid.h" -#define VI_PIPE_PER_MEC (4) +enum hqd_dequeue_request_type { + NO_ACTION = 0, + DRAIN_PIPE, + RESET_WAVES +}; struct cik_sdma_rlc_registers; @@ -56,12 +61,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr); + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm); static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id); static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, @@ -86,6 +94,33 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid); static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); + +/* Because of REG_GET_FIELD() being used, we put this function in the + * asic specific file. + */ +static int get_tile_config(struct kgd_dev *kgd, + struct tile_config *config) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + config->gb_addr_config = adev->gfx.config.gb_addr_config; + config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFBANK); + config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFRANKS); + + config->tile_config_ptr = adev->gfx.config.tile_mode_array; + config->num_tile_configs = + ARRAY_SIZE(adev->gfx.config.tile_mode_array); + config->macro_tile_config_ptr = + adev->gfx.config.macrotile_mode_array; + config->num_macro_tile_configs = + ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); + + return 0; +} static const struct kfd2kgd_calls kfd2kgd = { .init_gtt_mem_allocation = alloc_gtt_mem, @@ -112,7 +147,9 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, .write_vmid_invalidate_request = write_vmid_invalidate_request, - .get_fw_version = get_fw_version + .get_fw_version = get_fw_version, + .set_scratch_backing_va = set_scratch_backing_va, + .get_tile_config = get_tile_config, }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) @@ -146,8 +183,10 @@ static void unlock_srbm(struct kgd_dev *kgd) static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id) { - uint32_t mec = (++pipe_id / VI_PIPE_PER_MEC) + 1; - uint32_t pipe = (pipe_id % VI_PIPE_PER_MEC); + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); lock_srbm(kgd, mec, pipe, queue_id, 0); } @@ -205,6 +244,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr) { + /* amdgpu owns the per-pipe state */ return 0; } @@ -214,8 +254,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) uint32_t mec; uint32_t pipe; - mec = (++pipe_id / VI_PIPE_PER_MEC) + 1; - pipe = (pipe_id % VI_PIPE_PER_MEC); + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); lock_srbm(kgd, mec, pipe, 0, 0); @@ -242,61 +282,66 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) } static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr) + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm) { - struct vi_mqd *m; - uint32_t shadow_wptr, valid_wptr; struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct vi_mqd *m; + uint32_t *mqd_hqd; + uint32_t reg, wptr_val, data; m = get_mqd(mqd); - valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr)); acquire_queue(kgd, pipe_id, queue_id); - WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control); - WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); - WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); - - WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid); - WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state); - WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); - WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); - WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum); - WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); - WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - m->cp_hqd_pq_rptr_report_addr_hi); - - if (valid_wptr > 0) - WREG32(mmCP_HQD_PQ_WPTR, shadow_wptr); - - WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control); - - WREG32(mmCP_HQD_EOP_BASE_ADDR, m->cp_hqd_eop_base_addr_lo); - WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, m->cp_hqd_eop_base_addr_hi); - WREG32(mmCP_HQD_EOP_CONTROL, m->cp_hqd_eop_control); - WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr); - WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr); - WREG32(mmCP_HQD_EOP_EVENTS, m->cp_hqd_eop_done_events); - - WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, m->cp_hqd_ctx_save_base_addr_lo); - WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, m->cp_hqd_ctx_save_base_addr_hi); - WREG32(mmCP_HQD_CTX_SAVE_CONTROL, m->cp_hqd_ctx_save_control); - WREG32(mmCP_HQD_CNTL_STACK_OFFSET, m->cp_hqd_cntl_stack_offset); - WREG32(mmCP_HQD_CNTL_STACK_SIZE, m->cp_hqd_cntl_stack_size); - WREG32(mmCP_HQD_WG_STATE_OFFSET, m->cp_hqd_wg_state_offset); - WREG32(mmCP_HQD_CTX_SAVE_SIZE, m->cp_hqd_ctx_save_size); - - WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control); - - WREG32(mmCP_HQD_DEQUEUE_REQUEST, m->cp_hqd_dequeue_request); - WREG32(mmCP_HQD_ERROR, m->cp_hqd_error); - WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem); - WREG32(mmCP_HQD_EOP_DONES, m->cp_hqd_eop_dones); - - WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active); + /* HIQ is set during driver init period with vmid set to 0*/ + if (m->cp_hqd_vmid == 0) { + uint32_t value, mec, pipe; + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + value = RREG32(mmRLC_CP_SCHEDULERS); + value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, + ((mec << 5) | (pipe << 3) | queue_id | 0x80)); + WREG32(mmRLC_CP_SCHEDULERS, value); + } + + /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ + mqd_hqd = &m->cp_mqd_base_addr_lo; + + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++) + WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); + + /* Tonga errata: EOP RPTR/WPTR should be left unmodified. + * This is safe since EOP RPTR==WPTR for any inactive HQD + * on ASICs that do not support context-save. + * EOP writes/reads can start anywhere in the ring. + */ + if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) { + WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr); + WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr); + WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem); + } + + for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++) + WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); + + /* Copy userspace write pointer value to register. + * Activate doorbell logic to monitor subsequent changes. + */ + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); + + if (read_user_wptr(mm, wptr, wptr_val)) + WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); + + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); + WREG32(mmCP_HQD_ACTIVE, data); release_queue(kgd); @@ -348,29 +393,102 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t temp; - int timeout = utimeout; + enum hqd_dequeue_request_type type; + unsigned long flags, end_jiffies; + int retry; + struct vi_mqd *m = get_mqd(mqd); acquire_queue(kgd, pipe_id, queue_id); - WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); + if (m->cp_hqd_vmid == 0) + WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0); + switch (reset_type) { + case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: + type = DRAIN_PIPE; + break; + case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: + type = RESET_WAVES; + break; + default: + type = DRAIN_PIPE; + break; + } + + /* Workaround: If IQ timer is active and the wait time is close to or + * equal to 0, dequeueing is not safe. Wait until either the wait time + * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is + * cleared before continuing. Also, ensure wait times are set to at + * least 0x3. + */ + local_irq_save(flags); + preempt_disable(); + retry = 5000; /* wait for 500 usecs at maximum */ + while (true) { + temp = RREG32(mmCP_HQD_IQ_TIMER); + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { + pr_debug("HW is processing IQ\n"); + goto loop; + } + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) + == 3) /* SEM-rearm is safe */ + break; + /* Wait time 3 is safe for CP, but our MMIO read/write + * time is close to 1 microsecond, so check for 10 to + * leave more buffer room + */ + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) + >= 10) + break; + pr_debug("IQ timer is active\n"); + } else + break; +loop: + if (!retry) { + pr_err("CP HQD IQ timer status time out\n"); + break; + } + ndelay(100); + --retry; + } + retry = 1000; + while (true) { + temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); + if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) + break; + pr_debug("Dequeue request is pending\n"); + + if (!retry) { + pr_err("CP HQD dequeue request time out\n"); + break; + } + ndelay(100); + --retry; + } + local_irq_restore(flags); + preempt_enable(); + + WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); + + end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { temp = RREG32(mmCP_HQD_ACTIVE); - if (temp & CP_HQD_ACTIVE__ACTIVE_MASK) + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) break; - if (timeout <= 0) { - pr_err("kfd: cp queue preemption time out.\n"); + if (time_after(jiffies, end_jiffies)) { + pr_err("cp queue preemption time out.\n"); release_queue(kgd); return -ETIME; } - msleep(20); - timeout -= 20; + usleep_range(500, 1000); } release_queue(kgd); @@ -484,6 +602,16 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, return 0; } +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + lock_srbm(kgd, 0, 0, 0, vmid); + WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); + unlock_srbm(kgd); +} + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; @@ -494,42 +622,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) switch (type) { case KGD_ENGINE_PFP: hdr = (const union amdgpu_firmware_header *) - adev->gfx.pfp_fw->data; + adev->gfx.pfp_fw->data; break; case KGD_ENGINE_ME: hdr = (const union amdgpu_firmware_header *) - adev->gfx.me_fw->data; + adev->gfx.me_fw->data; break; case KGD_ENGINE_CE: hdr = (const union amdgpu_firmware_header *) - adev->gfx.ce_fw->data; + adev->gfx.ce_fw->data; break; case KGD_ENGINE_MEC1: hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec_fw->data; + adev->gfx.mec_fw->data; break; case KGD_ENGINE_MEC2: hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec2_fw->data; + adev->gfx.mec2_fw->data; break; case KGD_ENGINE_RLC: hdr = (const union amdgpu_firmware_header *) - adev->gfx.rlc_fw->data; + adev->gfx.rlc_fw->data; break; case KGD_ENGINE_SDMA1: hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[0].fw->data; + adev->sdma.instance[0].fw->data; break; case KGD_ENGINE_SDMA2: hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[1].fw->data; + adev->sdma.instance[1].fw->data; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 1e8e1123ddf4..ce443586a0c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1686,7 +1686,7 @@ void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock) { uint32_t bios_6_scratch; - bios_6_scratch = RREG32(mmBIOS_SCRATCH_6); + bios_6_scratch = RREG32(adev->bios_scratch_reg_offset + 6); if (lock) { bios_6_scratch |= ATOM_S6_CRITICAL_STATE; @@ -1696,15 +1696,17 @@ void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock) bios_6_scratch |= ATOM_S6_ACC_MODE; } - WREG32(mmBIOS_SCRATCH_6, bios_6_scratch); + WREG32(adev->bios_scratch_reg_offset + 6, bios_6_scratch); } void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev) { uint32_t bios_2_scratch, bios_6_scratch; - bios_2_scratch = RREG32(mmBIOS_SCRATCH_2); - bios_6_scratch = RREG32(mmBIOS_SCRATCH_6); + adev->bios_scratch_reg_offset = mmBIOS_SCRATCH_0; + + bios_2_scratch = RREG32(adev->bios_scratch_reg_offset + 2); + bios_6_scratch = RREG32(adev->bios_scratch_reg_offset + 6); /* let the bios control the backlight */ bios_2_scratch &= ~ATOM_S2_VRI_BRIGHT_ENABLE; @@ -1715,8 +1717,8 @@ void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev) /* clear the vbios dpms state */ bios_2_scratch &= ~ATOM_S2_DEVICE_DPMS_STATE; - WREG32(mmBIOS_SCRATCH_2, bios_2_scratch); - WREG32(mmBIOS_SCRATCH_6, bios_6_scratch); + WREG32(adev->bios_scratch_reg_offset + 2, bios_2_scratch); + WREG32(adev->bios_scratch_reg_offset + 6, bios_6_scratch); } void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev) @@ -1724,7 +1726,7 @@ void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev) int i; for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++) - adev->bios_scratch[i] = RREG32(mmBIOS_SCRATCH_0 + i); + adev->bios_scratch[i] = RREG32(adev->bios_scratch_reg_offset + i); } void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev) @@ -1738,20 +1740,30 @@ void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev) adev->bios_scratch[7] &= ~ATOM_S7_ASIC_INIT_COMPLETE_MASK; for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++) - WREG32(mmBIOS_SCRATCH_0 + i, adev->bios_scratch[i]); + WREG32(adev->bios_scratch_reg_offset + i, adev->bios_scratch[i]); } void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev, bool hung) { - u32 tmp = RREG32(mmBIOS_SCRATCH_3); + u32 tmp = RREG32(adev->bios_scratch_reg_offset + 3); if (hung) tmp |= ATOM_S3_ASIC_GUI_ENGINE_HUNG; else tmp &= ~ATOM_S3_ASIC_GUI_ENGINE_HUNG; - WREG32(mmBIOS_SCRATCH_3, tmp); + WREG32(adev->bios_scratch_reg_offset + 3, tmp); +} + +bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev) +{ + u32 tmp = RREG32(adev->bios_scratch_reg_offset + 7); + + if (tmp & ATOM_S7_ASIC_INIT_COMPLETE_MASK) + return false; + else + return true; } /* Atom needs data in little endian format diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h index 38d0fe32e5cd..b0d5d1d7fdba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h @@ -200,6 +200,7 @@ void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev); void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev); void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev, bool hung); +bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev); void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le); int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 4bdda56fccee..f9ffe8ef0cd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -66,41 +66,6 @@ void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev) } } -void amdgpu_atomfirmware_scratch_regs_save(struct amdgpu_device *adev) -{ - int i; - - for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++) - adev->bios_scratch[i] = RREG32(adev->bios_scratch_reg_offset + i); -} - -void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev) -{ - int i; - - /* - * VBIOS will check ASIC_INIT_COMPLETE bit to decide if - * execute ASIC_Init posting via driver - */ - adev->bios_scratch[7] &= ~ATOM_S7_ASIC_INIT_COMPLETE_MASK; - - for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++) - WREG32(adev->bios_scratch_reg_offset + i, adev->bios_scratch[i]); -} - -void amdgpu_atomfirmware_scratch_regs_engine_hung(struct amdgpu_device *adev, - bool hung) -{ - u32 tmp = RREG32(adev->bios_scratch_reg_offset + 3); - - if (hung) - tmp |= ATOM_S3_ASIC_GUI_ENGINE_HUNG; - else - tmp &= ~ATOM_S3_ASIC_GUI_ENGINE_HUNG; - - WREG32(adev->bios_scratch_reg_offset + 3, tmp); -} - int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) { struct atom_context *ctx = adev->mode_info.atom_context; @@ -130,3 +95,129 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) ctx->scratch_size_bytes = usage_bytes; return 0; } + +union igp_info { + struct atom_integrated_system_info_v1_11 v11; +}; + +/* + * Return vram width from integrated system info table, if available, + * or 0 if not. + */ +int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev) +{ + struct amdgpu_mode_info *mode_info = &adev->mode_info; + int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + integratedsysteminfo); + u16 data_offset, size; + union igp_info *igp_info; + u8 frev, crev; + + /* get any igp specific overrides */ + if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size, + &frev, &crev, &data_offset)) { + igp_info = (union igp_info *) + (mode_info->atom_context->bios + data_offset); + switch (crev) { + case 11: + return igp_info->v11.umachannelnumber * 64; + default: + return 0; + } + } + + return 0; +} + +union firmware_info { + struct atom_firmware_info_v3_1 v31; +}; + +union smu_info { + struct atom_smu_info_v3_1 v31; +}; + +union umc_info { + struct atom_umc_info_v3_1 v31; +}; + +int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev) +{ + struct amdgpu_mode_info *mode_info = &adev->mode_info; + struct amdgpu_pll *spll = &adev->clock.spll; + struct amdgpu_pll *mpll = &adev->clock.mpll; + uint8_t frev, crev; + uint16_t data_offset; + int ret = -EINVAL, index; + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + firmwareinfo); + if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) { + union firmware_info *firmware_info = + (union firmware_info *)(mode_info->atom_context->bios + + data_offset); + + adev->clock.default_sclk = + le32_to_cpu(firmware_info->v31.bootup_sclk_in10khz); + adev->clock.default_mclk = + le32_to_cpu(firmware_info->v31.bootup_mclk_in10khz); + + adev->pm.current_sclk = adev->clock.default_sclk; + adev->pm.current_mclk = adev->clock.default_mclk; + + /* not technically a clock, but... */ + adev->mode_info.firmware_flags = + le32_to_cpu(firmware_info->v31.firmware_capability); + + ret = 0; + } + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + smu_info); + if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) { + union smu_info *smu_info = + (union smu_info *)(mode_info->atom_context->bios + + data_offset); + + /* system clock */ + spll->reference_freq = le32_to_cpu(smu_info->v31.core_refclk_10khz); + + spll->reference_div = 0; + spll->min_post_div = 1; + spll->max_post_div = 1; + spll->min_ref_div = 2; + spll->max_ref_div = 0xff; + spll->min_feedback_div = 4; + spll->max_feedback_div = 0xff; + spll->best_vco = 0; + + ret = 0; + } + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + umc_info); + if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) { + union umc_info *umc_info = + (union umc_info *)(mode_info->atom_context->bios + + data_offset); + + /* memory clock */ + mpll->reference_freq = le32_to_cpu(umc_info->v31.mem_refclk_10khz); + + mpll->reference_div = 0; + mpll->min_post_div = 1; + mpll->max_post_div = 1; + mpll->min_ref_div = 2; + mpll->max_ref_div = 0xff; + mpll->min_feedback_div = 4; + mpll->max_feedback_div = 0xff; + mpll->best_vco = 0; + + ret = 0; + } + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index a2c3ebe22c71..288b97e54347 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -26,10 +26,8 @@ bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev); void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev); -void amdgpu_atomfirmware_scratch_regs_save(struct amdgpu_device *adev); -void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev); -void amdgpu_atomfirmware_scratch_regs_engine_hung(struct amdgpu_device *adev, - bool hung); int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); +int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev); +int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 1beae5b930d0..63ec1e1bb6aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -40,7 +40,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, for (i = 0; i < n; i++) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence, - false); + false, false); if (r) goto exit_do_move; r = dma_fence_wait(fence, false); @@ -81,7 +81,7 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, n = AMDGPU_BENCHMARK_ITERATIONS; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, sdomain, 0, NULL, - NULL, &sobj); + NULL, 0, &sobj); if (r) { goto out_cleanup; } @@ -94,7 +94,7 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, goto out_cleanup; } r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, ddomain, 0, NULL, - NULL, &dobj); + NULL, 0, &dobj); if (r) { goto out_cleanup; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 365e735f6647..c21adf60a7f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -86,19 +86,6 @@ static bool check_atom_bios(uint8_t *bios, size_t size) return false; } -static bool is_atom_fw(uint8_t *bios) -{ - uint16_t bios_header_start = bios[0x48] | (bios[0x49] << 8); - uint8_t frev = bios[bios_header_start + 2]; - uint8_t crev = bios[bios_header_start + 3]; - - if ((frev < 3) || - ((frev == 3) && (crev < 3))) - return false; - - return true; -} - /* If you boot an IGP board with a discrete card as the primary, * the IGP rom is not accessible via the rom bar as the IGP rom is * part of the system bios. On boot, the system bios puts a @@ -117,7 +104,7 @@ static bool igp_read_bios_from_vram(struct amdgpu_device *adev) adev->bios = NULL; vram_base = pci_resource_start(adev->pdev, 0); - bios = ioremap(vram_base, size); + bios = ioremap_wc(vram_base, size); if (!bios) { return false; } @@ -455,6 +442,6 @@ bool amdgpu_get_bios(struct amdgpu_device *adev) return false; success: - adev->is_atom_fw = is_atom_fw(adev->bios); + adev->is_atom_fw = (adev->asic_type >= CHIP_VEGA10) ? true : false; return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index a6649874e6ce..59089e027f4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -35,33 +35,59 @@ #define AMDGPU_BO_LIST_MAX_PRIORITY 32u #define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1) -static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv, - struct amdgpu_bo_list **result, +static int amdgpu_bo_list_set(struct amdgpu_device *adev, + struct drm_file *filp, + struct amdgpu_bo_list *list, + struct drm_amdgpu_bo_list_entry *info, + unsigned num_entries); + +static void amdgpu_bo_list_release_rcu(struct kref *ref) +{ + unsigned i; + struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list, + refcount); + + for (i = 0; i < list->num_entries; ++i) + amdgpu_bo_unref(&list->array[i].robj); + + mutex_destroy(&list->lock); + kvfree(list->array); + kfree_rcu(list, rhead); +} + +static int amdgpu_bo_list_create(struct amdgpu_device *adev, + struct drm_file *filp, + struct drm_amdgpu_bo_list_entry *info, + unsigned num_entries, int *id) { int r; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_bo_list *list; - *result = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL); - if (!*result) + list = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL); + if (!list) return -ENOMEM; + /* initialize bo list*/ + mutex_init(&list->lock); + kref_init(&list->refcount); + r = amdgpu_bo_list_set(adev, filp, list, info, num_entries); + if (r) { + kfree(list); + return r; + } + + /* idr alloc should be called only after initialization of bo list. */ mutex_lock(&fpriv->bo_list_lock); - r = idr_alloc(&fpriv->bo_list_handles, *result, - 1, 0, GFP_KERNEL); + r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL); + mutex_unlock(&fpriv->bo_list_lock); if (r < 0) { - mutex_unlock(&fpriv->bo_list_lock); - kfree(*result); + amdgpu_bo_list_free(list); return r; } *id = r; - mutex_init(&(*result)->lock); - (*result)->num_entries = 0; - (*result)->array = NULL; - - mutex_lock(&(*result)->lock); - mutex_unlock(&fpriv->bo_list_lock); - return 0; } @@ -71,13 +97,9 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) mutex_lock(&fpriv->bo_list_lock); list = idr_remove(&fpriv->bo_list_handles, id); - if (list) { - /* Another user may have a reference to this list still */ - mutex_lock(&list->lock); - mutex_unlock(&list->lock); - amdgpu_bo_list_free(list); - } mutex_unlock(&fpriv->bo_list_lock); + if (list) + kref_put(&list->refcount, amdgpu_bo_list_release_rcu); } static int amdgpu_bo_list_set(struct amdgpu_device *adev, @@ -96,7 +118,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, int r; unsigned long total_size = 0; - array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry)); + array = kvmalloc_array(num_entries, sizeof(struct amdgpu_bo_list_entry), GFP_KERNEL); if (!array) return -ENOMEM; memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry)); @@ -114,7 +136,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, } bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm); if (usermm) { @@ -134,11 +156,11 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, entry->tv.bo = &entry->robj->tbo; entry->tv.shared = !entry->robj->prime_shared_count; - if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GDS) + if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GDS) gds_obj = entry->robj; - if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GWS) + if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GWS) gws_obj = entry->robj; - if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_OA) + if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_OA) oa_obj = entry->robj; total_size += amdgpu_bo_size(entry->robj); @@ -148,7 +170,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, for (i = 0; i < list->num_entries; ++i) amdgpu_bo_unref(&list->array[i].robj); - drm_free_large(list->array); + kvfree(list->array); list->gds_obj = gds_obj; list->gws_obj = gws_obj; @@ -163,7 +185,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, error_free: while (i--) amdgpu_bo_unref(&array[i].robj); - drm_free_large(array); + kvfree(array); return r; } @@ -172,11 +194,21 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id) { struct amdgpu_bo_list *result; - mutex_lock(&fpriv->bo_list_lock); + rcu_read_lock(); result = idr_find(&fpriv->bo_list_handles, id); - if (result) - mutex_lock(&result->lock); - mutex_unlock(&fpriv->bo_list_lock); + + if (result) { + if (kref_get_unless_zero(&result->refcount)) { + rcu_read_unlock(); + mutex_lock(&result->lock); + } else { + rcu_read_unlock(); + result = NULL; + } + } else { + rcu_read_unlock(); + } + return result; } @@ -214,6 +246,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, void amdgpu_bo_list_put(struct amdgpu_bo_list *list) { mutex_unlock(&list->lock); + kref_put(&list->refcount, amdgpu_bo_list_release_rcu); } void amdgpu_bo_list_free(struct amdgpu_bo_list *list) @@ -224,7 +257,7 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list) amdgpu_bo_unref(&list->array[i].robj); mutex_destroy(&list->lock); - drm_free_large(list->array); + kvfree(list->array); kfree(list); } @@ -237,15 +270,15 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_bo_list *args = data; uint32_t handle = args->in.list_handle; - const void __user *uptr = (const void*)(uintptr_t)args->in.bo_info_ptr; + const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr); struct drm_amdgpu_bo_list_entry *info; struct amdgpu_bo_list *list; int r; - info = drm_malloc_ab(args->in.bo_number, - sizeof(struct drm_amdgpu_bo_list_entry)); + info = kvmalloc_array(args->in.bo_number, + sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL); if (!info) return -ENOMEM; @@ -273,16 +306,10 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, switch (args->in.operation) { case AMDGPU_BO_LIST_OP_CREATE: - r = amdgpu_bo_list_create(fpriv, &list, &handle); - if (r) - goto error_free; - - r = amdgpu_bo_list_set(adev, filp, list, info, - args->in.bo_number); - amdgpu_bo_list_put(list); + r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number, + &handle); if (r) goto error_free; - break; case AMDGPU_BO_LIST_OP_DESTROY: @@ -311,11 +338,11 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, memset(args, 0, sizeof(*args)); args->out.list_handle = handle; - drm_free_large(info); + kvfree(info); return 0; error_free: - drm_free_large(info); + kvfree(info); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index c6dba1eaefbd..fd435a96481c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -124,7 +124,7 @@ static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, ret = amdgpu_bo_create_restricted(adev, size, PAGE_SIZE, true, domain, flags, NULL, &placement, NULL, - &obj); + 0, &obj); if (ret) { DRM_ERROR("(%d) bo create failed\n", ret); return ret; @@ -166,7 +166,7 @@ static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t h r = amdgpu_bo_reserve(obj, true); if (unlikely(r != 0)) return r; - r = amdgpu_bo_pin_restricted(obj, obj->prefered_domains, + r = amdgpu_bo_pin_restricted(obj, obj->preferred_domains, min_offset, max_offset, mcaddr); amdgpu_bo_unreserve(obj); return r; @@ -240,6 +240,8 @@ static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device, return RREG32_DIDT(index); case CGS_IND_REG_GC_CAC: return RREG32_GC_CAC(index); + case CGS_IND_REG_SE_CAC: + return RREG32_SE_CAC(index); case CGS_IND_REG__AUDIO_ENDPT: DRM_ERROR("audio endpt register access not implemented.\n"); return 0; @@ -266,6 +268,8 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device, return WREG32_DIDT(index, value); case CGS_IND_REG_GC_CAC: return WREG32_GC_CAC(index, value); + case CGS_IND_REG_SE_CAC: + return WREG32_SE_CAC(index, value); case CGS_IND_REG__AUDIO_ENDPT: DRM_ERROR("audio endpt register access not implemented.\n"); return; @@ -610,6 +614,17 @@ static int amdgpu_cgs_enter_safe_mode(struct cgs_device *cgs_device, return 0; } +static void amdgpu_cgs_lock_grbm_idx(struct cgs_device *cgs_device, + bool lock) +{ + CGS_FUNC_ADEV; + + if (lock) + mutex_lock(&adev->grbm_idx_mutex); + else + mutex_unlock(&adev->grbm_idx_mutex); +} + static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, enum cgs_ucode_id type, struct cgs_firmware_info *info) @@ -644,7 +659,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, info->version = (uint16_t)le32_to_cpu(header->header.ucode_version); if (CGS_UCODE_ID_CP_MEC == type) - info->image_size = (header->jt_offset) << 2; + info->image_size = le32_to_cpu(header->jt_offset) << 2; info->fw_version = amdgpu_get_firmware_version(cgs_device, type); info->feature_version = (uint16_t)le32_to_cpu(header->ucode_feature_version); @@ -719,7 +734,13 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, strcpy(fw_name, "amdgpu/polaris12_smc.bin"); break; case CHIP_VEGA10: - strcpy(fw_name, "amdgpu/vega10_smc.bin"); + if ((adev->pdev->device == 0x687f) && + ((adev->pdev->revision == 0xc0) || + (adev->pdev->revision == 0xc1) || + (adev->pdev->revision == 0xc3))) + strcpy(fw_name, "amdgpu/vega10_acg_smc.bin"); + else + strcpy(fw_name, "amdgpu/vega10_smc.bin"); break; default: DRM_ERROR("SMC firmware not supported\n"); @@ -838,6 +859,12 @@ static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device, return -EINVAL; mode_info = info->mode_info; + if (mode_info) { + /* if the displays are off, vblank time is max */ + mode_info->vblank_time_us = 0xffffffff; + /* always set the reference clock */ + mode_info->ref_clock = adev->clock.spll.reference_freq; + } if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { list_for_each_entry(crtc, @@ -1111,6 +1138,7 @@ static const struct cgs_ops amdgpu_cgs_ops = { .query_system_info = amdgpu_cgs_query_system_info, .is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled, .enter_safe_mode = amdgpu_cgs_enter_safe_mode, + .lock_grbm_idx = amdgpu_cgs_lock_grbm_idx, }; static const struct cgs_os_ops amdgpu_cgs_os_ops = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 4e6b9501ab0a..60d8bedb694d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -27,81 +27,10 @@ #include <linux/pagemap.h> #include <drm/drmP.h> #include <drm/amdgpu_drm.h> +#include <drm/drm_syncobj.h> #include "amdgpu.h" #include "amdgpu_trace.h" -int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, - u32 ip_instance, u32 ring, - struct amdgpu_ring **out_ring) -{ - /* Right now all IPs have only one instance - multiple rings. */ - if (ip_instance != 0) { - DRM_ERROR("invalid ip instance: %d\n", ip_instance); - return -EINVAL; - } - - switch (ip_type) { - default: - DRM_ERROR("unknown ip type: %d\n", ip_type); - return -EINVAL; - case AMDGPU_HW_IP_GFX: - if (ring < adev->gfx.num_gfx_rings) { - *out_ring = &adev->gfx.gfx_ring[ring]; - } else { - DRM_ERROR("only %d gfx rings are supported now\n", - adev->gfx.num_gfx_rings); - return -EINVAL; - } - break; - case AMDGPU_HW_IP_COMPUTE: - if (ring < adev->gfx.num_compute_rings) { - *out_ring = &adev->gfx.compute_ring[ring]; - } else { - DRM_ERROR("only %d compute rings are supported now\n", - adev->gfx.num_compute_rings); - return -EINVAL; - } - break; - case AMDGPU_HW_IP_DMA: - if (ring < adev->sdma.num_instances) { - *out_ring = &adev->sdma.instance[ring].ring; - } else { - DRM_ERROR("only %d SDMA rings are supported\n", - adev->sdma.num_instances); - return -EINVAL; - } - break; - case AMDGPU_HW_IP_UVD: - *out_ring = &adev->uvd.ring; - break; - case AMDGPU_HW_IP_VCE: - if (ring < adev->vce.num_rings){ - *out_ring = &adev->vce.ring[ring]; - } else { - DRM_ERROR("only %d VCE rings are supported\n", adev->vce.num_rings); - return -EINVAL; - } - break; - case AMDGPU_HW_IP_UVD_ENC: - if (ring < adev->uvd.num_enc_rings){ - *out_ring = &adev->uvd.ring_enc[ring]; - } else { - DRM_ERROR("only %d UVD ENC rings are supported\n", - adev->uvd.num_enc_rings); - return -EINVAL; - } - break; - } - - if (!(*out_ring && (*out_ring)->adev)) { - DRM_ERROR("Ring %d is not initialized on IP %d\n", - ring, ip_type); - return -EINVAL; - } - - return 0; -} - static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, struct drm_amdgpu_cs_chunk_fence *data, uint32_t *offset) @@ -125,7 +54,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, *offset = data->offset; - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) { amdgpu_bo_unref(&p->uf_entry.robj); @@ -135,7 +64,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, return 0; } -int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; @@ -161,7 +90,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) } /* get chunks */ - chunk_array_user = (uint64_t __user *)(uintptr_t)(cs->in.chunks); + chunk_array_user = u64_to_user_ptr(cs->in.chunks); if (copy_from_user(chunk_array, chunk_array_user, sizeof(uint64_t)*cs->in.num_chunks)) { ret = -EFAULT; @@ -181,7 +110,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) struct drm_amdgpu_cs_chunk user_chunk; uint32_t __user *cdata; - chunk_ptr = (void __user *)(uintptr_t)chunk_array[i]; + chunk_ptr = u64_to_user_ptr(chunk_array[i]); if (copy_from_user(&user_chunk, chunk_ptr, sizeof(struct drm_amdgpu_cs_chunk))) { ret = -EFAULT; @@ -192,9 +121,9 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) p->chunks[i].length_dw = user_chunk.length_dw; size = p->chunks[i].length_dw; - cdata = (void __user *)(uintptr_t)user_chunk.chunk_data; + cdata = u64_to_user_ptr(user_chunk.chunk_data); - p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); + p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); if (p->chunks[i].kdata == NULL) { ret = -ENOMEM; i--; @@ -226,6 +155,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) break; case AMDGPU_CHUNK_ID_DEPENDENCIES: + case AMDGPU_CHUNK_ID_SYNCOBJ_IN: + case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: break; default: @@ -247,7 +178,7 @@ free_all_kdata: i = p->nchunks - 1; free_partial_kdata: for (; i >= 0; i--) - drm_free_large(p->chunks[i].kdata); + kvfree(p->chunks[i].kdata); kfree(p->chunks); p->chunks = NULL; p->nchunks = 0; @@ -292,10 +223,11 @@ static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes) * ticks. The accumulated microseconds (us) are converted to bytes and * returned. */ -static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) +static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, + u64 *max_bytes, + u64 *max_vis_bytes) { s64 time_us, increment_us; - u64 max_bytes; u64 free_vram, total_vram, used_vram; /* Allow a maximum of 200 accumulated ms. This is basically per-IB @@ -307,11 +239,14 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) */ const s64 us_upper_bound = 200000; - if (!adev->mm_stats.log2_max_MBps) - return 0; + if (!adev->mm_stats.log2_max_MBps) { + *max_bytes = 0; + *max_vis_bytes = 0; + return; + } total_vram = adev->mc.real_vram_size - adev->vram_pin_size; - used_vram = atomic64_read(&adev->vram_usage); + used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; spin_lock(&adev->mm_stats.lock); @@ -349,23 +284,46 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us); } - /* This returns 0 if the driver is in debt to disallow (optional) + /* This is set to 0 if the driver is in debt to disallow (optional) * buffer moves. */ - max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); + *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); + + /* Do the same for visible VRAM if half of it is free */ + if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { + u64 total_vis_vram = adev->mc.visible_vram_size; + u64 used_vis_vram = + amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); + + if (used_vis_vram < total_vis_vram) { + u64 free_vis_vram = total_vis_vram - used_vis_vram; + adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis + + increment_us, us_upper_bound); + + if (free_vis_vram >= total_vis_vram / 2) + adev->mm_stats.accum_us_vis = + max(bytes_to_us(adev, free_vis_vram / 2), + adev->mm_stats.accum_us_vis); + } + + *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis); + } else { + *max_vis_bytes = 0; + } spin_unlock(&adev->mm_stats.lock); - return max_bytes; } /* Report how many bytes have really been moved for the last command * submission. This can result in a debt that can stop buffer migrations * temporarily. */ -void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes) +void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, + u64 num_vis_bytes) { spin_lock(&adev->mm_stats.lock); adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes); + adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes); spin_unlock(&adev->mm_stats.lock); } @@ -373,7 +331,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - u64 initial_bytes_moved; + u64 initial_bytes_moved, bytes_moved; uint32_t domain; int r; @@ -383,17 +341,35 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, /* Don't move this buffer if we have depleted our allowance * to move it. Don't move anything if the threshold is zero. */ - if (p->bytes_moved < p->bytes_moved_threshold) - domain = bo->prefered_domains; - else + if (p->bytes_moved < p->bytes_moved_threshold) { + if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { + /* And don't move a CPU_ACCESS_REQUIRED BO to limited + * visible VRAM if we've depleted our allowance to do + * that. + */ + if (p->bytes_moved_vis < p->bytes_moved_vis_threshold) + domain = bo->preferred_domains; + else + domain = bo->allowed_domains; + } else { + domain = bo->preferred_domains; + } + } else { domain = bo->allowed_domains; + } retry: amdgpu_ttm_placement_from_domain(bo, domain); initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - p->bytes_moved += atomic64_read(&adev->num_bytes_moved) - - initial_bytes_moved; + bytes_moved = atomic64_read(&adev->num_bytes_moved) - + initial_bytes_moved; + p->bytes_moved += bytes_moved; + if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + bo->tbo.mem.mem_type == TTM_PL_VRAM && + bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) + p->bytes_moved_vis += bytes_moved; if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { domain = bo->allowed_domains; @@ -419,7 +395,8 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, struct amdgpu_bo_list_entry *candidate = p->evictable; struct amdgpu_bo *bo = candidate->robj; struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - u64 initial_bytes_moved; + u64 initial_bytes_moved, bytes_moved; + bool update_bytes_moved_vis; uint32_t other; /* If we reached our current BO we can forget it */ @@ -439,10 +416,17 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, /* Good we can try to move this BO somewhere else */ amdgpu_ttm_placement_from_domain(bo, other); + update_bytes_moved_vis = + adev->mc.visible_vram_size < adev->mc.real_vram_size && + bo->tbo.mem.mem_type == TTM_PL_VRAM && + bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT; initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - p->bytes_moved += atomic64_read(&adev->num_bytes_moved) - + bytes_moved = atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved; + p->bytes_moved += bytes_moved; + if (update_bytes_moved_vis) + p->bytes_moved_vis += bytes_moved; if (unlikely(r)) break; @@ -505,7 +489,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, return r; if (binding_userptr) { - drm_free_large(lobj->user_pages); + kvfree(lobj->user_pages); lobj->user_pages = NULL; } } @@ -566,12 +550,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, &e->user_invalidated) && e->user_pages) { /* We acquired a page array, but somebody - * invalidated it. Free it an try again + * invalidated it. Free it and try again */ release_pages(e->user_pages, e->robj->tbo.ttm->num_pages, false); - drm_free_large(e->user_pages); + kvfree(e->user_pages); e->user_pages = NULL; } @@ -597,12 +581,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto error_free_pages; } - /* Fill the page arrays for all useptrs. */ + /* Fill the page arrays for all userptrs. */ list_for_each_entry(e, &need_pages, tv.head) { struct ttm_tt *ttm = e->robj->tbo.ttm; - e->user_pages = drm_calloc_large(ttm->num_pages, - sizeof(struct page*)); + e->user_pages = kvmalloc_array(ttm->num_pages, + sizeof(struct page*), + GFP_KERNEL | __GFP_ZERO); if (!e->user_pages) { r = -ENOMEM; DRM_ERROR("calloc failure in %s\n", __func__); @@ -612,7 +597,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); if (r) { DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); - drm_free_large(e->user_pages); + kvfree(e->user_pages); e->user_pages = NULL; goto error_free_pages; } @@ -622,8 +607,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, list_splice(&need_pages, &p->validated); } - p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev); + amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, + &p->bytes_moved_vis_threshold); p->bytes_moved = 0; + p->bytes_moved_vis = 0; p->evictable = list_last_entry(&p->validated, struct amdgpu_bo_list_entry, tv.head); @@ -647,8 +634,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto error_validate; } - amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved); - + amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, + p->bytes_moved_vis); fpriv->vm.last_eviction_counter = atomic64_read(&p->adev->num_evictions); @@ -687,10 +674,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, } error_validate: - if (r) { - amdgpu_vm_move_pt_bos_in_lru(p->adev, &fpriv->vm); + if (r) ttm_eu_backoff_reservation(&p->ticket, &p->validated); - } error_free_pages: @@ -708,7 +693,7 @@ error_free_pages: release_pages(e->user_pages, e->robj->tbo.ttm->num_pages, false); - drm_free_large(e->user_pages); + kvfree(e->user_pages); } } @@ -738,21 +723,23 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) * If error is set than unvalidate buffer, otherwise just free memory * used by parsing context. **/ -static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) +static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, + bool backoff) { - struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; unsigned i; - if (!error) { - amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm); - + if (!error) ttm_eu_fence_buffer_objects(&parser->ticket, &parser->validated, parser->fence); - } else if (backoff) { + else if (backoff) ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); - } + + for (i = 0; i < parser->num_post_dep_syncobjs; i++) + drm_syncobj_put(parser->post_dep_syncobjs[i]); + kfree(parser->post_dep_syncobjs); + dma_fence_put(parser->fence); if (parser->ctx) @@ -761,7 +748,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo amdgpu_bo_list_put(parser->bo_list); for (i = 0; i < parser->nchunks; i++) - drm_free_large(parser->chunks[i].kdata); + kvfree(parser->chunks[i].kdata); kfree(parser->chunks); if (parser->job) amdgpu_job_free(parser->job); @@ -800,7 +787,8 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) if (amdgpu_sriov_vf(adev)) { struct dma_fence *f; - bo_va = vm->csa_bo_va; + + bo_va = fpriv->csa_va; BUG_ON(!bo_va); r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) @@ -837,7 +825,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) } - r = amdgpu_vm_clear_invalids(adev, vm, &p->job->sync); + r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync); if (amdgpu_vm_debug && p->bo_list) { /* Invalidate all BOs to test for userspace bugs */ @@ -916,9 +904,8 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, return -EINVAL; } - r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type, - chunk_ib->ip_instance, chunk_ib->ring, - &ring); + r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type, + chunk_ib->ip_instance, chunk_ib->ring, &ring); if (r) return r; @@ -995,62 +982,151 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, return 0; } -static int amdgpu_cs_dependencies(struct amdgpu_device *adev, - struct amdgpu_cs_parser *p) +static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - int i, j, r; + unsigned num_deps; + int i, r; + struct drm_amdgpu_cs_chunk_dep *deps; - for (i = 0; i < p->nchunks; ++i) { - struct drm_amdgpu_cs_chunk_dep *deps; - struct amdgpu_cs_chunk *chunk; - unsigned num_deps; + deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_dep); - chunk = &p->chunks[i]; - - if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES) - continue; + for (i = 0; i < num_deps; ++i) { + struct amdgpu_ring *ring; + struct amdgpu_ctx *ctx; + struct dma_fence *fence; - deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_dep); + ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); + if (ctx == NULL) + return -EINVAL; - for (j = 0; j < num_deps; ++j) { - struct amdgpu_ring *ring; - struct amdgpu_ctx *ctx; - struct dma_fence *fence; + r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr, + deps[i].ip_type, + deps[i].ip_instance, + deps[i].ring, &ring); + if (r) { + amdgpu_ctx_put(ctx); + return r; + } - r = amdgpu_cs_get_ring(adev, deps[j].ip_type, - deps[j].ip_instance, - deps[j].ring, &ring); + fence = amdgpu_ctx_get_fence(ctx, ring, + deps[i].handle); + if (IS_ERR(fence)) { + r = PTR_ERR(fence); + amdgpu_ctx_put(ctx); + return r; + } else if (fence) { + r = amdgpu_sync_fence(p->adev, &p->job->sync, + fence); + dma_fence_put(fence); + amdgpu_ctx_put(ctx); if (r) return r; + } + } + return 0; +} - ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id); - if (ctx == NULL) - return -EINVAL; +static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, + uint32_t handle) +{ + int r; + struct dma_fence *fence; + r = drm_syncobj_find_fence(p->filp, handle, &fence); + if (r) + return r; - fence = amdgpu_ctx_get_fence(ctx, ring, - deps[j].handle); - if (IS_ERR(fence)) { - r = PTR_ERR(fence); - amdgpu_ctx_put(ctx); - return r; + r = amdgpu_sync_fence(p->adev, &p->job->sync, fence); + dma_fence_put(fence); - } else if (fence) { - r = amdgpu_sync_fence(adev, &p->job->sync, - fence); - dma_fence_put(fence); - amdgpu_ctx_put(ctx); - if (r) - return r; - } + return r; +} + +static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + unsigned num_deps; + int i, r; + struct drm_amdgpu_cs_chunk_sem *deps; + + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_sem); + + for (i = 0; i < num_deps; ++i) { + r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle); + if (r) + return r; + } + return 0; +} + +static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + unsigned num_deps; + int i; + struct drm_amdgpu_cs_chunk_sem *deps; + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_sem); + + p->post_dep_syncobjs = kmalloc_array(num_deps, + sizeof(struct drm_syncobj *), + GFP_KERNEL); + p->num_post_dep_syncobjs = 0; + + if (!p->post_dep_syncobjs) + return -ENOMEM; + + for (i = 0; i < num_deps; ++i) { + p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle); + if (!p->post_dep_syncobjs[i]) + return -EINVAL; + p->num_post_dep_syncobjs++; + } + return 0; +} + +static int amdgpu_cs_dependencies(struct amdgpu_device *adev, + struct amdgpu_cs_parser *p) +{ + int i, r; + + for (i = 0; i < p->nchunks; ++i) { + struct amdgpu_cs_chunk *chunk; + + chunk = &p->chunks[i]; + + if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) { + r = amdgpu_cs_process_fence_dep(p, chunk); + if (r) + return r; + } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) { + r = amdgpu_cs_process_syncobj_in_dep(p, chunk); + if (r) + return r; + } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) { + r = amdgpu_cs_process_syncobj_out_dep(p, chunk); + if (r) + return r; } } return 0; } +static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) +{ + int i; + + for (i = 0; i < p->num_post_dep_syncobjs; ++i) + drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence); +} + static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { @@ -1071,20 +1147,22 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job->owner = p->filp; job->fence_ctx = entity->fence_context; p->fence = dma_fence_get(&job->base.s_fence->finished); + + amdgpu_cs_post_dependencies(p); + cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); job->uf_sequence = cs->out.handle; amdgpu_job_free_resources(job); - amdgpu_cs_parser_fini(p, 0, true); trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(&job->base); - return 0; } int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_cs *cs = data; struct amdgpu_cs_parser parser = {}; bool reserved_buffers = false; @@ -1092,6 +1170,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (!adev->accel_working) return -EBUSY; + if (amdgpu_kms_vram_lost(adev, fpriv)) + return -ENODEV; parser.adev = adev; parser.filp = filp; @@ -1130,10 +1210,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; r = amdgpu_cs_submit(&parser, cs); - if (r) - goto out; - return 0; out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); return r; @@ -1153,21 +1230,28 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, { union drm_amdgpu_wait_cs *wait = data; struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); struct amdgpu_ring *ring = NULL; struct amdgpu_ctx *ctx; struct dma_fence *fence; long r; - r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, - wait->in.ring, &ring); - if (r) - return r; + if (amdgpu_kms_vram_lost(adev, fpriv)) + return -ENODEV; ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); if (ctx == NULL) return -EINVAL; + r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, + wait->in.ip_type, wait->in.ip_instance, + wait->in.ring, &ring); + if (r) { + amdgpu_ctx_put(ctx); + return r; + } + fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); if (IS_ERR(fence)) r = PTR_ERR(fence); @@ -1203,15 +1287,17 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, struct dma_fence *fence; int r; - r = amdgpu_cs_get_ring(adev, user->ip_type, user->ip_instance, - user->ring, &ring); - if (r) - return ERR_PTR(r); - ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id); if (ctx == NULL) return ERR_PTR(-EINVAL); + r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type, + user->ip_instance, user->ring, &ring); + if (r) { + amdgpu_ctx_put(ctx); + return ERR_PTR(r); + } + fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no); amdgpu_ctx_put(ctx); @@ -1332,19 +1418,22 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_wait_fences *wait = data; uint32_t fence_count = wait->in.fence_count; struct drm_amdgpu_fence *fences_user; struct drm_amdgpu_fence *fences; int r; + if (amdgpu_kms_vram_lost(adev, fpriv)) + return -ENODEV; /* Get the fences from userspace */ fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), GFP_KERNEL); if (fences == NULL) return -ENOMEM; - fences_user = (void __user *)(uintptr_t)(wait->in.fences); + fences_user = u64_to_user_ptr(wait->in.fences); if (copy_from_user(fences, fences_user, sizeof(struct drm_amdgpu_fence) * fence_count)) { r = -EFAULT; @@ -1397,7 +1486,7 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, addr > mapping->last) continue; - *bo = lobj->bo_va->bo; + *bo = lobj->bo_va->base.bo; return mapping; } @@ -1406,7 +1495,7 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, addr > mapping->last) continue; - *bo = lobj->bo_va->bo; + *bo = lobj->bo_va->base.bo; return mapping; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 90d1ac8a80f8..a11e44340b23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -52,12 +52,20 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) struct amd_sched_rq *rq; rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + + if (ring == &adev->gfx.kiq.ring) + continue; + r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity, rq, amdgpu_sched_jobs); if (r) goto failed; } + r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr); + if (r) + goto failed; + return 0; failed: @@ -86,6 +94,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) for (i = 0; i < adev->num_rings; i++) amd_sched_entity_fini(&adev->rings[i]->sched, &ctx->rings[i].entity); + + amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); } static int amdgpu_ctx_alloc(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 43ca16b6eee2..e630d918fefc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -53,9 +53,18 @@ #include "bif/bif_4_1_d.h" #include <linux/pci.h> #include <linux/firmware.h> +#include "amdgpu_vf_error.h" + +#include "amdgpu_amdkfd.h" + +MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); + +#define AMDGPU_RESUME_MS 2000 static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev); +static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev); static const char *amdgpu_asic_name[] = { "TAHITI", @@ -77,6 +86,7 @@ static const char *amdgpu_asic_name[] = { "POLARIS11", "POLARIS12", "VEGA10", + "RAVEN", "LAST", }; @@ -121,6 +131,10 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, { trace_amdgpu_mm_wreg(adev->pdev->device, reg, v); + if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { + adev->last_mm_index = v; + } + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) { BUG_ON(in_interrupt()); return amdgpu_virt_kiq_wreg(adev, reg, v); @@ -136,6 +150,10 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); } + + if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) { + udelay(500); + } } u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) @@ -150,6 +168,9 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { + if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { + adev->last_mm_index = v; + } if ((reg * 4) < adev->rio_mem_size) iowrite32(v, adev->rio_mem + (reg * 4)); @@ -157,6 +178,10 @@ void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v) iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); iowrite32(v, adev->rio_mem + (mmMM_DATA * 4)); } + + if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) { + udelay(500); + } } /** @@ -311,51 +336,16 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, static int amdgpu_vram_scratch_init(struct amdgpu_device *adev) { - int r; - - if (adev->vram_scratch.robj == NULL) { - r = amdgpu_bo_create(adev, AMDGPU_GPU_PAGE_SIZE, - PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &adev->vram_scratch.robj); - if (r) { - return r; - } - } - - r = amdgpu_bo_reserve(adev->vram_scratch.robj, false); - if (unlikely(r != 0)) - return r; - r = amdgpu_bo_pin(adev->vram_scratch.robj, - AMDGPU_GEM_DOMAIN_VRAM, &adev->vram_scratch.gpu_addr); - if (r) { - amdgpu_bo_unreserve(adev->vram_scratch.robj); - return r; - } - r = amdgpu_bo_kmap(adev->vram_scratch.robj, - (void **)&adev->vram_scratch.ptr); - if (r) - amdgpu_bo_unpin(adev->vram_scratch.robj); - amdgpu_bo_unreserve(adev->vram_scratch.robj); - - return r; + return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &adev->vram_scratch.robj, + &adev->vram_scratch.gpu_addr, + (void **)&adev->vram_scratch.ptr); } static void amdgpu_vram_scratch_fini(struct amdgpu_device *adev) { - int r; - - if (adev->vram_scratch.robj == NULL) { - return; - } - r = amdgpu_bo_reserve(adev->vram_scratch.robj, true); - if (likely(r == 0)) { - amdgpu_bo_kunmap(adev->vram_scratch.robj); - amdgpu_bo_unpin(adev->vram_scratch.robj); - amdgpu_bo_unreserve(adev->vram_scratch.robj); - } - amdgpu_bo_unref(&adev->vram_scratch.robj); + amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL); } /** @@ -478,9 +468,8 @@ void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, /* * amdgpu_wb_*() - * Writeback is the the method by which the the GPU updates special pages - * in memory with the status of certain GPU events (fences, ring pointers, - * etc.). + * Writeback is the method by which the GPU updates special pages in memory + * with the status of certain GPU events (fences, ring pointers,etc.). */ /** @@ -506,7 +495,7 @@ static void amdgpu_wb_fini(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * - * Disables Writeback and frees the Writeback memory (all asics). + * Initializes writeback and allocates writeback memory (all asics). * Used at driver startup. * Returns 0 on success or an -error on failure. */ @@ -515,7 +504,8 @@ static int amdgpu_wb_init(struct amdgpu_device *adev) int r; if (adev->wb.wb_obj == NULL) { - r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t), + /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */ + r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->wb.wb_obj, &adev->wb.gpu_addr, (void **)&adev->wb.wb); @@ -546,32 +536,10 @@ static int amdgpu_wb_init(struct amdgpu_device *adev) int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb) { unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); - if (offset < adev->wb.num_wb) { - __set_bit(offset, adev->wb.used); - *wb = offset; - return 0; - } else { - return -EINVAL; - } -} -/** - * amdgpu_wb_get_64bit - Allocate a wb entry - * - * @adev: amdgpu_device pointer - * @wb: wb index - * - * Allocate a wb slot for use by the driver (all asics). - * Returns 0 on success or -EINVAL on failure. - */ -int amdgpu_wb_get_64bit(struct amdgpu_device *adev, u32 *wb) -{ - unsigned long offset = bitmap_find_next_zero_area_off(adev->wb.used, - adev->wb.num_wb, 0, 2, 7, 0); - if ((offset + 1) < adev->wb.num_wb) { + if (offset < adev->wb.num_wb) { __set_bit(offset, adev->wb.used); - __set_bit(offset + 1, adev->wb.used); - *wb = offset; + *wb = offset * 8; /* convert to dw offset */ return 0; } else { return -EINVAL; @@ -593,28 +561,12 @@ void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb) } /** - * amdgpu_wb_free_64bit - Free a wb entry - * - * @adev: amdgpu_device pointer - * @wb: wb index - * - * Free a wb slot allocated for use by the driver (all asics) - */ -void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb) -{ - if ((wb + 1) < adev->wb.num_wb) { - __clear_bit(wb, adev->wb.used); - __clear_bit(wb + 1, adev->wb.used); - } -} - -/** * amdgpu_vram_location - try to find VRAM location * @adev: amdgpu device structure holding all necessary informations * @mc: memory controller structure holding memory informations * @base: base address at which to put VRAM * - * Function will place try to place VRAM at base address provided + * Function will try to place VRAM at base address provided * as parameter (which is so far either PCI aperture address or * for IGP TOM base address). * @@ -636,7 +588,7 @@ void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb) * ones) * * Note: IGP TOM addr should be the same as the aperture addr, we don't - * explicitly check for that thought. + * explicitly check for that though. * * FIXME: when reducing VRAM size align new size on power of 2. */ @@ -659,7 +611,7 @@ void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 } /** - * amdgpu_gtt_location - try to find GTT location + * amdgpu_gart_location - try to find GTT location * @adev: amdgpu device structure holding all necessary informations * @mc: memory controller structure holding memory informations * @@ -670,28 +622,28 @@ void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 * * FIXME: when reducing GTT size align new size on power of 2. */ -void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc) +void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc) { u64 size_af, size_bf; - size_af = ((adev->mc.mc_mask - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align; - size_bf = mc->vram_start & ~mc->gtt_base_align; + size_af = adev->mc.mc_mask - mc->vram_end; + size_bf = mc->vram_start; if (size_bf > size_af) { - if (mc->gtt_size > size_bf) { + if (mc->gart_size > size_bf) { dev_warn(adev->dev, "limiting GTT\n"); - mc->gtt_size = size_bf; + mc->gart_size = size_bf; } - mc->gtt_start = 0; + mc->gart_start = 0; } else { - if (mc->gtt_size > size_af) { + if (mc->gart_size > size_af) { dev_warn(adev->dev, "limiting GTT\n"); - mc->gtt_size = size_af; + mc->gart_size = size_af; } - mc->gtt_start = (mc->vram_end + 1 + mc->gtt_base_align) & ~mc->gtt_base_align; + mc->gart_start = mc->vram_end + 1; } - mc->gtt_end = mc->gtt_start + mc->gtt_size - 1; + mc->gart_end = mc->gart_start + mc->gart_size - 1; dev_info(adev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n", - mc->gtt_size >> 20, mc->gtt_start, mc->gtt_end); + mc->gart_size >> 20, mc->gart_start, mc->gart_end); } /* @@ -714,7 +666,12 @@ bool amdgpu_need_post(struct amdgpu_device *adev) adev->has_hw_reset = false; return true; } - /* then check MEM_SIZE, in case the crtcs are off */ + + /* bios scratch used on CIK+ */ + if (adev->asic_type >= CHIP_BONAIRE) + return amdgpu_atombios_scratch_need_asic_init(adev); + + /* check MEM_SIZE for older asics */ reg = amdgpu_asic_get_config_memsize(adev); if ((reg != 0) && (reg != 0xffffffff)) @@ -1025,19 +982,6 @@ static unsigned int amdgpu_vga_set_decode(void *cookie, bool state) return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; } -/** - * amdgpu_check_pot_argument - check that argument is a power of two - * - * @arg: value to check - * - * Validates that a certain argument is a power of two (all asics). - * Returns true if argument is valid. - */ -static bool amdgpu_check_pot_argument(int arg) -{ - return (arg & (arg - 1)) == 0; -} - static void amdgpu_check_block_size(struct amdgpu_device *adev) { /* defines number of bits in page table versus page directory, @@ -1067,7 +1011,11 @@ def_value: static void amdgpu_check_vm_size(struct amdgpu_device *adev) { - if (!amdgpu_check_pot_argument(amdgpu_vm_size)) { + /* no need to check the default value */ + if (amdgpu_vm_size == -1) + return; + + if (!is_power_of_2(amdgpu_vm_size)) { dev_warn(adev->dev, "VM size (%d) must be a power of 2\n", amdgpu_vm_size); goto def_value; @@ -1108,19 +1056,31 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev) dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", amdgpu_sched_jobs); amdgpu_sched_jobs = 4; - } else if (!amdgpu_check_pot_argument(amdgpu_sched_jobs)){ + } else if (!is_power_of_2(amdgpu_sched_jobs)){ dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n", amdgpu_sched_jobs); amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); } - if (amdgpu_gart_size != -1) { + if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) { + /* gart size must be greater or equal to 32M */ + dev_warn(adev->dev, "gart size (%d) too small\n", + amdgpu_gart_size); + amdgpu_gart_size = -1; + } + + if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) { /* gtt size must be greater or equal to 32M */ - if (amdgpu_gart_size < 32) { - dev_warn(adev->dev, "gart size (%d) too small\n", - amdgpu_gart_size); - amdgpu_gart_size = -1; - } + dev_warn(adev->dev, "gtt size (%d) too small\n", + amdgpu_gtt_size); + amdgpu_gtt_size = -1; + } + + /* valid range is between 4 and 9 inclusive */ + if (amdgpu_vm_fragment_size != -1 && + (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) { + dev_warn(adev->dev, "valid range is between 4 and 9\n"); + amdgpu_vm_fragment_size = -1; } amdgpu_check_vm_size(adev); @@ -1128,7 +1088,7 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev) amdgpu_check_block_size(adev); if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 || - !amdgpu_check_pot_argument(amdgpu_vram_page_split))) { + !is_power_of_2(amdgpu_vram_page_split))) { dev_warn(adev->dev, "invalid VRAM page split (%d)\n", amdgpu_vram_page_split); amdgpu_vram_page_split = 1024; @@ -1152,16 +1112,12 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero return; if (state == VGA_SWITCHEROO_ON) { - unsigned d3_delay = dev->pdev->d3_delay; - pr_info("amdgpu: switched on\n"); /* don't suspend or resume card normally */ dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; amdgpu_device_resume(dev, true, true); - dev->pdev->d3_delay = d3_delay; - dev->switch_power_state = DRM_SWITCH_POWER_ON; drm_kms_helper_poll_enable(dev); } else { @@ -1342,6 +1298,9 @@ int amdgpu_ip_block_add(struct amdgpu_device *adev, if (!ip_block_version) return -EINVAL; + DRM_DEBUG("add ip block number %d <%s>\n", adev->num_ip_blocks, + ip_block_version->funcs->name); + adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; return 0; @@ -1392,6 +1351,104 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) } } +static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) +{ + const char *chip_name; + char fw_name[30]; + int err; + const struct gpu_info_firmware_header_v1_0 *hdr; + + adev->firmware.gpu_info_fw = NULL; + + switch (adev->asic_type) { + case CHIP_TOPAZ: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS11: + case CHIP_POLARIS10: + case CHIP_POLARIS12: + case CHIP_CARRIZO: + case CHIP_STONEY: +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_VERDE: + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_OLAND: + case CHIP_HAINAN: +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KAVERI: + case CHIP_KABINI: + case CHIP_MULLINS: +#endif + default: + return 0; + case CHIP_VEGA10: + chip_name = "vega10"; + break; + case CHIP_RAVEN: + chip_name = "raven"; + break; + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); + err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev); + if (err) { + dev_err(adev->dev, + "Failed to load gpu_info firmware \"%s\"\n", + fw_name); + goto out; + } + err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw); + if (err) { + dev_err(adev->dev, + "Failed to validate gpu_info firmware \"%s\"\n", + fw_name); + goto out; + } + + hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data; + amdgpu_ucode_print_gpu_info_hdr(&hdr->header); + + switch (hdr->version_major) { + case 1: + { + const struct gpu_info_firmware_v1_0 *gpu_info_fw = + (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + + adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); + adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); + adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); + adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se); + adev->gfx.config.max_texture_channel_caches = + le32_to_cpu(gpu_info_fw->gc_num_tccs); + adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs); + adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds); + adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth); + adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth); + adev->gfx.config.double_offchip_lds_buf = + le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer); + adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size); + adev->gfx.cu_info.max_waves_per_simd = + le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd); + adev->gfx.cu_info.max_scratch_slots_per_cu = + le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu); + adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size); + break; + } + default: + dev_err(adev->dev, + "Unsupported gpu_info table %d\n", hdr->header.ucode_version); + err = -EINVAL; + goto out; + } +out: + return err; +} + static int amdgpu_early_init(struct amdgpu_device *adev) { int i, r; @@ -1444,8 +1501,12 @@ static int amdgpu_early_init(struct amdgpu_device *adev) return r; break; #endif - case CHIP_VEGA10: - adev->family = AMDGPU_FAMILY_AI; + case CHIP_VEGA10: + case CHIP_RAVEN: + if (adev->asic_type == CHIP_RAVEN) + adev->family = AMDGPU_FAMILY_RV; + else + adev->family = AMDGPU_FAMILY_AI; r = soc15_set_ip_blocks(adev); if (r) @@ -1456,6 +1517,10 @@ static int amdgpu_early_init(struct amdgpu_device *adev) return -EINVAL; } + r = amdgpu_device_parse_gpu_info_fw(adev); + if (r) + return r; + if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_request_full_gpu(adev, true); if (r) @@ -1464,7 +1529,8 @@ static int amdgpu_early_init(struct amdgpu_device *adev) for (i = 0; i < adev->num_ip_blocks; i++) { if ((amdgpu_ip_block_mask & (1 << i)) == 0) { - DRM_ERROR("disabled ip block: %d\n", i); + DRM_ERROR("disabled ip block: %d <%s>\n", + i, adev->ip_blocks[i].version->funcs->name); adev->ip_blocks[i].status.valid = false; } else { if (adev->ip_blocks[i].version->funcs->early_init) { @@ -1552,22 +1618,24 @@ static int amdgpu_init(struct amdgpu_device *adev) return 0; } -static int amdgpu_late_init(struct amdgpu_device *adev) +static void amdgpu_fill_reset_magic(struct amdgpu_device *adev) +{ + memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); +} + +static bool amdgpu_check_vram_lost(struct amdgpu_device *adev) +{ + return !!memcmp(adev->gart.ptr, adev->reset_magic, + AMDGPU_RESET_MAGIC_NUM); +} + +static int amdgpu_late_set_cg_state(struct amdgpu_device *adev) { int i = 0, r; for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; - if (adev->ip_blocks[i].version->funcs->late_init) { - r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); - if (r) { - DRM_ERROR("late_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - return r; - } - adev->ip_blocks[i].status.late_initialized = true; - } /* skip CG for VCE/UVD, it's handled specially */ if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) { @@ -1581,6 +1649,31 @@ static int amdgpu_late_init(struct amdgpu_device *adev) } } } + return 0; +} + +static int amdgpu_late_init(struct amdgpu_device *adev) +{ + int i = 0, r; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid) + continue; + if (adev->ip_blocks[i].version->funcs->late_init) { + r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); + if (r) { + DRM_ERROR("late_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + adev->ip_blocks[i].status.late_initialized = true; + } + } + + mod_delayed_work(system_wq, &adev->late_init_work, + msecs_to_jiffies(AMDGPU_RESUME_MS)); + + amdgpu_fill_reset_magic(adev); return 0; } @@ -1672,6 +1765,13 @@ static int amdgpu_fini(struct amdgpu_device *adev) return 0; } +static void amdgpu_late_init_func_handler(struct work_struct *work) +{ + struct amdgpu_device *adev = + container_of(work, struct amdgpu_device, late_init_work.work); + amdgpu_late_set_cg_state(adev); +} + int amdgpu_suspend(struct amdgpu_device *adev) { int i, r; @@ -1717,19 +1817,25 @@ static int amdgpu_sriov_reinit_early(struct amdgpu_device *adev) { int i, r; - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) - continue; + static enum amd_ip_block_type ip_order[] = { + AMD_IP_BLOCK_TYPE_GMC, + AMD_IP_BLOCK_TYPE_COMMON, + AMD_IP_BLOCK_TYPE_IH, + }; - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) - r = adev->ip_blocks[i].version->funcs->hw_init(adev); + for (i = 0; i < ARRAY_SIZE(ip_order); i++) { + int j; + struct amdgpu_ip_block *block; - if (r) { - DRM_ERROR("resume of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - return r; + for (j = 0; j < adev->num_ip_blocks; j++) { + block = &adev->ip_blocks[j]; + + if (block->version->type != ip_order[i] || + !block->status.valid) + continue; + + r = block->version->funcs->hw_init(adev); + DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed"); } } @@ -1740,33 +1846,68 @@ static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev) { int i, r; + static enum amd_ip_block_type ip_order[] = { + AMD_IP_BLOCK_TYPE_SMC, + AMD_IP_BLOCK_TYPE_DCE, + AMD_IP_BLOCK_TYPE_GFX, + AMD_IP_BLOCK_TYPE_SDMA, + AMD_IP_BLOCK_TYPE_UVD, + AMD_IP_BLOCK_TYPE_VCE + }; + + for (i = 0; i < ARRAY_SIZE(ip_order); i++) { + int j; + struct amdgpu_ip_block *block; + + for (j = 0; j < adev->num_ip_blocks; j++) { + block = &adev->ip_blocks[j]; + + if (block->version->type != ip_order[i] || + !block->status.valid) + continue; + + r = block->version->funcs->hw_init(adev); + DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed"); + } + } + + return 0; +} + +static int amdgpu_resume_phase1(struct amdgpu_device *adev) +{ + int i, r; + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ) - continue; - - r = adev->ip_blocks[i].version->funcs->hw_init(adev); - if (r) { - DRM_ERROR("resume of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - return r; + adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_IH) { + r = adev->ip_blocks[i].version->funcs->resume(adev); + if (r) { + DRM_ERROR("resume of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } } } return 0; } -static int amdgpu_resume(struct amdgpu_device *adev) +static int amdgpu_resume_phase2(struct amdgpu_device *adev) { int i, r; for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ) + continue; r = adev->ip_blocks[i].version->funcs->resume(adev); if (r) { DRM_ERROR("resume of IP block <%s> failed %d\n", @@ -1778,6 +1919,18 @@ static int amdgpu_resume(struct amdgpu_device *adev) return 0; } +static int amdgpu_resume(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_resume_phase1(adev); + if (r) + return r; + r = amdgpu_resume_phase2(adev); + + return r; +} + static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) { if (adev->is_atom_fw) { @@ -1817,7 +1970,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->flags = flags; adev->asic_type = flags & AMD_ASIC_MASK; adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; - adev->mc.gtt_size = 512 * 1024 * 1024; + adev->mc.gart_size = 512 * 1024 * 1024; adev->accel_working = false; adev->num_rings = 0; adev->mman.buffer_funcs = NULL; @@ -1860,14 +2013,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_check_arguments(adev); - /* Registers mapping */ - /* TODO: block userspace mapping of io register */ spin_lock_init(&adev->mmio_idx_lock); spin_lock_init(&adev->smc_idx_lock); spin_lock_init(&adev->pcie_idx_lock); spin_lock_init(&adev->uvd_ctx_idx_lock); spin_lock_init(&adev->didt_idx_lock); spin_lock_init(&adev->gc_cac_idx_lock); + spin_lock_init(&adev->se_cac_idx_lock); spin_lock_init(&adev->audio_endpt_idx_lock); spin_lock_init(&adev->mm_stats.lock); @@ -1877,6 +2029,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_LIST_HEAD(&adev->gtt_list); spin_lock_init(&adev->gtt_list_lock); + INIT_LIST_HEAD(&adev->ring_lru_list); + spin_lock_init(&adev->ring_lru_list_lock); + + INIT_DELAYED_WORK(&adev->late_init_work, amdgpu_late_init_func_handler); + + /* Registers mapping */ + /* TODO: block userspace mapping of io register */ if (adev->asic_type >= CHIP_BONAIRE) { adev->rmmio_base = pci_resource_start(adev->pdev, 5); adev->rmmio_size = pci_resource_len(adev->pdev, 5); @@ -1936,6 +2095,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_atombios_init(adev); if (r) { dev_err(adev->dev, "amdgpu_atombios_init failed\n"); + amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); goto failed; } @@ -1946,6 +2106,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (amdgpu_vpost_needed(adev)) { if (!adev->bios) { dev_err(adev->dev, "no vBIOS found\n"); + amdgpu_vf_error_put(AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); r = -EINVAL; goto failed; } @@ -1953,18 +2114,28 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_atom_asic_init(adev->mode_info.atom_context); if (r) { dev_err(adev->dev, "gpu post error!\n"); + amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_POST_ERROR, 0, 0); goto failed; } } else { DRM_INFO("GPU post is not needed\n"); } - if (!adev->is_atom_fw) { + if (adev->is_atom_fw) { + /* Initialize clocks */ + r = amdgpu_atomfirmware_get_clock_info(adev); + if (r) { + dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); + amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); + goto failed; + } + } else { /* Initialize clocks */ r = amdgpu_atombios_get_clock_info(adev); if (r) { dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); - return r; + amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); + goto failed; } /* init i2c buses */ amdgpu_atombios_i2c_init(adev); @@ -1974,6 +2145,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_fence_driver_init(adev); if (r) { dev_err(adev->dev, "amdgpu_fence_driver_init failed\n"); + amdgpu_vf_error_put(AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0); goto failed; } @@ -1983,12 +2155,15 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_init(adev); if (r) { dev_err(adev->dev, "amdgpu_init failed\n"); + amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); amdgpu_fini(adev); goto failed; } adev->accel_working = true; + amdgpu_vm_check_compute_bug(adev); + /* Initialize the buffer migration limit. */ if (amdgpu_moverate >= 0) max_MBps = amdgpu_moverate; @@ -2000,6 +2175,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_ib_pool_init(adev); if (r) { dev_err(adev->dev, "IB initialization failed (%d).\n", r); + amdgpu_vf_error_put(AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); goto failed; } @@ -2017,6 +2193,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) DRM_ERROR("registering register debugfs failed (%d).\n", r); + r = amdgpu_debugfs_test_ib_ring_init(adev); + if (r) + DRM_ERROR("registering register test ib ring debugfs failed (%d).\n", r); + r = amdgpu_debugfs_firmware_init(adev); if (r) DRM_ERROR("registering firmware debugfs failed (%d).\n", r); @@ -2040,12 +2220,14 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_late_init(adev); if (r) { dev_err(adev->dev, "amdgpu_late_init failed\n"); + amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); goto failed; } return 0; failed: + amdgpu_vf_error_trans_all(adev); if (runtime) vga_switcheroo_fini_domain_pm_ops(adev->dev); return r; @@ -2073,7 +2255,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev) amdgpu_fence_driver_fini(adev); amdgpu_fbdev_fini(adev); r = amdgpu_fini(adev); + if (adev->firmware.gpu_info_fw) { + release_firmware(adev->firmware.gpu_info_fw); + adev->firmware.gpu_info_fw = NULL; + } adev->accel_working = false; + cancel_delayed_work_sync(&adev->late_init_work); /* free i2c buses */ amdgpu_i2c_fini(adev); amdgpu_atombios_fini(adev); @@ -2133,6 +2320,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) } drm_modeset_unlock_all(dev); + amdgpu_amdkfd_suspend(adev); + /* unpin the front buffers and cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); @@ -2174,10 +2363,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) */ amdgpu_bo_evict_vram(adev); - if (adev->is_atom_fw) - amdgpu_atomfirmware_scratch_regs_save(adev); - else - amdgpu_atombios_scratch_regs_save(adev); + amdgpu_atombios_scratch_regs_save(adev); pci_save_state(dev->pdev); if (suspend) { /* Shut down the device */ @@ -2226,10 +2412,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) if (r) goto unlock; } - if (adev->is_atom_fw) - amdgpu_atomfirmware_scratch_regs_restore(adev); - else - amdgpu_atombios_scratch_regs_restore(adev); + amdgpu_atombios_scratch_regs_restore(adev); /* post card */ if (amdgpu_need_post(adev)) { @@ -2272,6 +2455,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) } } } + r = amdgpu_amdkfd_resume(adev); + if (r) + return r; /* blat the mode back in */ if (fbcon) { @@ -2436,12 +2622,6 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev, goto err; } - r = amdgpu_ttm_bind(&bo->shadow->tbo, &bo->shadow->tbo.mem); - if (r) { - DRM_ERROR("%p bind failed\n", bo->shadow); - goto err; - } - r = amdgpu_bo_restore_from_shadow(adev, ring, bo, NULL, fence, true); if (r) { @@ -2458,16 +2638,15 @@ err: * amdgpu_sriov_gpu_reset - reset the asic * * @adev: amdgpu device pointer - * @voluntary: if this reset is requested by guest. - * (true means by guest and false means by HYPERVISOR ) + * @job: which job trigger hang * * Attempt the reset the GPU if it has hung (all asics). * for SRIOV case. * Returns 0 for success or an error on failure. */ -int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) +int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job) { - int i, r = 0; + int i, j, r = 0; int resched; struct amdgpu_bo *bo, *tmp; struct amdgpu_ring *ring; @@ -2480,22 +2659,39 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) /* block TTM */ resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); - /* block scheduler */ - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - ring = adev->rings[i]; + /* we start from the ring trigger GPU hang */ + j = job ? job->ring->idx : 0; + /* block scheduler */ + for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) { + ring = adev->rings[i % AMDGPU_MAX_RINGS]; if (!ring || !ring->sched.thread) continue; kthread_park(ring->sched.thread); + + if (job && j != i) + continue; + + /* here give the last chance to check if job removed from mirror-list + * since we already pay some time on kthread_park */ + if (job && list_empty(&job->base.node)) { + kthread_unpark(ring->sched.thread); + goto give_up_reset; + } + + if (amd_sched_invalidate_job(&job->base, amdgpu_job_hang_limit)) + amd_sched_job_kickout(&job->base); + + /* only do job_reset on the hang ring if @job not NULL */ amd_sched_hw_job_reset(&ring->sched); - } - /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ - amdgpu_fence_driver_force_completion(adev); + /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ + amdgpu_fence_driver_force_completion_ring(ring); + } /* request to take full control of GPU before re-initialization */ - if (voluntary) + if (job) amdgpu_virt_reset_gpu(adev); else amdgpu_virt_request_full_gpu(adev, true); @@ -2545,20 +2741,28 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) } dma_fence_put(fence); - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; + for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) { + ring = adev->rings[i % AMDGPU_MAX_RINGS]; if (!ring || !ring->sched.thread) continue; + if (job && j != i) { + kthread_unpark(ring->sched.thread); + continue; + } + amd_sched_job_recovery(&ring->sched); kthread_unpark(ring->sched.thread); } drm_helper_resume_force_mode(adev->ddev); +give_up_reset: ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); if (r) { /* bad news, how to tell it to userspace ? */ dev_info(adev->dev, "GPU reset failed\n"); + } else { + dev_info(adev->dev, "GPU reset successed!\n"); } adev->gfx.in_reset = false; @@ -2578,10 +2782,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) { int i, r; int resched; - bool need_full_reset; - - if (amdgpu_sriov_vf(adev)) - return amdgpu_sriov_gpu_reset(adev, true); + bool need_full_reset, vram_lost = false; if (!amdgpu_check_soft_reset(adev)) { DRM_INFO("No hardware hang detected. Did some blocks stall?\n"); @@ -2621,36 +2822,35 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) r = amdgpu_suspend(adev); retry: - /* Disable fb access */ - if (adev->mode_info.num_crtc) { - struct amdgpu_mode_mc_save save; - amdgpu_display_stop_mc_access(adev, &save); - amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC); - } - if (adev->is_atom_fw) - amdgpu_atomfirmware_scratch_regs_save(adev); - else - amdgpu_atombios_scratch_regs_save(adev); + amdgpu_atombios_scratch_regs_save(adev); r = amdgpu_asic_reset(adev); - if (adev->is_atom_fw) - amdgpu_atomfirmware_scratch_regs_restore(adev); - else - amdgpu_atombios_scratch_regs_restore(adev); + amdgpu_atombios_scratch_regs_restore(adev); /* post card */ amdgpu_atom_asic_init(adev->mode_info.atom_context); if (!r) { dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); - r = amdgpu_resume(adev); + r = amdgpu_resume_phase1(adev); + if (r) + goto out; + vram_lost = amdgpu_check_vram_lost(adev); + if (vram_lost) { + DRM_ERROR("VRAM is lost!\n"); + atomic_inc(&adev->vram_lost_counter); + } + r = amdgpu_ttm_recover_gart(adev); + if (r) + goto out; + r = amdgpu_resume_phase2(adev); + if (r) + goto out; + if (vram_lost) + amdgpu_fill_reset_magic(adev); } } +out: if (!r) { amdgpu_irq_gpu_reset_resume_helper(adev); - if (need_full_reset && amdgpu_need_backup(adev)) { - r = amdgpu_ttm_recover_gart(adev); - if (r) - DRM_ERROR("gart recovery failed!!!\n"); - } r = amdgpu_ib_ring_tests(adev); if (r) { dev_err(adev->dev, "ib ring test failed (%d).\n", r); @@ -2702,6 +2902,7 @@ retry: } } else { dev_err(adev->dev, "asic resume failed (%d).\n", r); + amdgpu_vf_error_put(AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, 0, r); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { if (adev->rings[i] && adev->rings[i]->sched.thread) { kthread_unpark(adev->rings[i]->sched.thread); @@ -2715,8 +2916,13 @@ retry: if (r) { /* bad news, how to tell it to userspace ? */ dev_info(adev->dev, "GPU reset failed\n"); + amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); + } + else { + dev_info(adev->dev, "GPU reset successed!\n"); } + amdgpu_vf_error_trans_all(adev); return r; } @@ -3499,11 +3705,60 @@ static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) } } +static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct amdgpu_device *adev = dev->dev_private; + int r = 0, i; + + /* hold on the scheduler */ + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + + if (!ring || !ring->sched.thread) + continue; + kthread_park(ring->sched.thread); + } + + seq_printf(m, "run ib test:\n"); + r = amdgpu_ib_ring_tests(adev); + if (r) + seq_printf(m, "ib ring tests failed (%d).\n", r); + else + seq_printf(m, "ib ring tests passed.\n"); + + /* go on the scheduler */ + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + + if (!ring || !ring->sched.thread) + continue; + kthread_unpark(ring->sched.thread); + } + + return 0; +} + +static const struct drm_info_list amdgpu_debugfs_test_ib_ring_list[] = { + {"amdgpu_test_ib", &amdgpu_debugfs_test_ib} +}; + +static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev) +{ + return amdgpu_debugfs_add_files(adev, + amdgpu_debugfs_test_ib_ring_list, 1); +} + int amdgpu_debugfs_init(struct drm_minor *minor) { return 0; } #else +static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev) +{ + return 0; +} static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index cdf2ab20166a..6ad243293a78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -482,7 +482,7 @@ static void amdgpu_user_framebuffer_destroy(struct drm_framebuffer *fb) { struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb); - drm_gem_object_unreference_unlocked(amdgpu_fb->obj); + drm_gem_object_put_unlocked(amdgpu_fb->obj); drm_framebuffer_cleanup(fb); kfree(amdgpu_fb); } @@ -542,14 +542,14 @@ amdgpu_user_framebuffer_create(struct drm_device *dev, amdgpu_fb = kzalloc(sizeof(*amdgpu_fb), GFP_KERNEL); if (amdgpu_fb == NULL) { - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ERR_PTR(-ENOMEM); } ret = amdgpu_framebuffer_init(dev, amdgpu_fb, mode_cmd, obj); if (ret) { kfree(amdgpu_fb); - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index 38e9b0d3659a..1cb52fd19060 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -22,7 +22,7 @@ * Authors: Alex Deucher */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_atombios.h" #include "amdgpu_i2c.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ab6b0d0febab..0f16986ec5bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -39,7 +39,7 @@ #include <linux/module.h> #include <linux/pm_runtime.h> #include <linux/vga_switcheroo.h> -#include "drm_crtc_helper.h" +#include <drm/drm_crtc_helper.h> #include "amdgpu.h" #include "amdgpu_irq.h" @@ -65,13 +65,19 @@ * - 3.13.0 - Add PRT support * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality * - 3.15.0 - Export more gpu info for gfx9 + * - 3.16.0 - Add reserved vmid support + * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS. + * - 3.18.0 - Export gpu always on cu bitmap + * - 3.19.0 - Add support for UVD MJPEG decode */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 15 +#define KMS_DRIVER_MINOR 19 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; +int amdgpu_vis_vram_limit = 0; int amdgpu_gart_size = -1; /* auto */ +int amdgpu_gtt_size = -1; /* auto */ int amdgpu_moverate = -1; /* auto */ int amdgpu_benchmarking = 0; int amdgpu_testing = 0; @@ -89,10 +95,12 @@ unsigned amdgpu_ip_block_mask = 0xffffffff; int amdgpu_bapm = -1; int amdgpu_deep_color = 0; int amdgpu_vm_size = -1; +int amdgpu_vm_fragment_size = -1; int amdgpu_vm_block_size = -1; int amdgpu_vm_fault_stop = 0; int amdgpu_vm_debug = 0; -int amdgpu_vram_page_split = 1024; +int amdgpu_vram_page_split = 512; +int amdgpu_vm_update_mode = -1; int amdgpu_exp_hw_support = 0; int amdgpu_sched_jobs = 32; int amdgpu_sched_hw_submission = 2; @@ -102,6 +110,7 @@ unsigned amdgpu_pcie_gen_cap = 0; unsigned amdgpu_pcie_lane_cap = 0; unsigned amdgpu_cg_mask = 0xffffffff; unsigned amdgpu_pg_mask = 0xffffffff; +unsigned amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu = NULL; char *amdgpu_virtual_display = NULL; unsigned amdgpu_pp_feature_mask = 0xffffffff; @@ -110,12 +119,20 @@ int amdgpu_prim_buf_per_se = 0; int amdgpu_pos_buf_per_se = 0; int amdgpu_cntl_sb_buf_per_se = 0; int amdgpu_param_buf_per_se = 0; +int amdgpu_job_hang_limit = 0; +int amdgpu_lbpw = -1; MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); -MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc., -1 = auto)"); -module_param_named(gartsize, amdgpu_gart_size, int, 0600); +MODULE_PARM_DESC(vis_vramlimit, "Restrict visible VRAM for testing, in megabytes"); +module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444); + +MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)"); +module_param_named(gartsize, amdgpu_gart_size, uint, 0600); + +MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)"); +module_param_named(gttsize, amdgpu_gtt_size, int, 0600); MODULE_PARM_DESC(moverate, "Maximum buffer migration rate in MB/s. (32, 64, etc., -1=auto, 0=1=disabled)"); module_param_named(moverate, amdgpu_moverate, int, 0600); @@ -168,6 +185,9 @@ module_param_named(deep_color, amdgpu_deep_color, int, 0444); MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 64GB)"); module_param_named(vm_size, amdgpu_vm_size, int, 0444); +MODULE_PARM_DESC(vm_fragment_size, "VM fragment size in bits (4, 5, etc. 4 = 64K (default), Max 9 = 2M)"); +module_param_named(vm_fragment_size, amdgpu_vm_fragment_size, int, 0444); + MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)"); module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444); @@ -177,7 +197,10 @@ module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444); MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)"); module_param_named(vm_debug, amdgpu_vm_debug, int, 0644); -MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 1024, -1 = disable)"); +MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except for large BAR(LB)), 1 = Graphics only, 2 = Compute only (default for LB), 3 = Both"); +module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444); + +MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 512, -1 = disable)"); module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444); MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))"); @@ -190,7 +213,7 @@ MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444); MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))"); -module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, int, 0444); +module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444); MODULE_PARM_DESC(no_evict, "Support pinning request from user space (1 = enable, 0 = disable (default))"); module_param_named(no_evict, amdgpu_no_evict, int, 0444); @@ -210,6 +233,9 @@ module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444); MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 = disable power gating)"); module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444); +MODULE_PARM_DESC(sdma_phase_quantum, "SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change (default 32))"); +module_param_named(sdma_phase_quantum, amdgpu_sdma_phase_quantum, uint, 0444); + MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)"); module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444); @@ -232,6 +258,38 @@ module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444); MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)"); module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444); +MODULE_PARM_DESC(job_hang_limit, "how much time allow a job hang and not drop it (default 0)"); +module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444); + +MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable, -1 = auto)"); +module_param_named(lbpw, amdgpu_lbpw, int, 0444); + +#ifdef CONFIG_DRM_AMDGPU_SI + +#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) +int amdgpu_si_support = 0; +MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))"); +#else +int amdgpu_si_support = 1; +MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)"); +#endif + +module_param_named(si_support, amdgpu_si_support, int, 0444); +#endif + +#ifdef CONFIG_DRM_AMDGPU_CIK + +#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) +int amdgpu_cik_support = 0; +MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))"); +#else +int amdgpu_cik_support = 1; +MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)"); +#endif + +module_param_named(cik_support, amdgpu_cik_support, int, 0444); +#endif + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI @@ -461,6 +519,9 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, {0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, {0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, + /* Raven */ + {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, + {0, 0, 0} }; @@ -492,6 +553,7 @@ static int amdgpu_kick_out_firmware_fb(struct pci_dev *pdev) static int amdgpu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { + struct drm_device *dev; unsigned long flags = ent->driver_data; int ret; @@ -514,7 +576,29 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, if (ret) return ret; - return drm_get_pci_dev(pdev, ent, &kms_driver); + dev = drm_dev_alloc(&kms_driver, &pdev->dev); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + ret = pci_enable_device(pdev); + if (ret) + goto err_free; + + dev->pdev = pdev; + + pci_set_drvdata(pdev, dev); + + ret = drm_dev_register(dev, ent->driver_data); + if (ret) + goto err_pci; + + return 0; + +err_pci: + pci_disable_device(pdev); +err_free: + drm_dev_unref(dev); + return ret; } static void @@ -522,7 +606,8 @@ amdgpu_pci_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); - drm_put_dev(dev); + drm_dev_unregister(dev); + drm_dev_unref(dev); } static void @@ -716,22 +801,31 @@ static const struct file_operations amdgpu_driver_kms_fops = { #endif }; +static bool +amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, + bool in_vblank_irq, int *vpos, int *hpos, + ktime_t *stime, ktime_t *etime, + const struct drm_display_mode *mode) +{ + return amdgpu_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos, + stime, etime, mode); +} + static struct drm_driver kms_driver = { .driver_features = DRIVER_USE_AGP | DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | - DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET, + DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, .load = amdgpu_driver_load_kms, .open = amdgpu_driver_open_kms, .postclose = amdgpu_driver_postclose_kms, .lastclose = amdgpu_driver_lastclose_kms, - .set_busid = drm_pci_set_busid, .unload = amdgpu_driver_unload_kms, .get_vblank_counter = amdgpu_get_vblank_counter_kms, .enable_vblank = amdgpu_enable_vblank_kms, .disable_vblank = amdgpu_disable_vblank_kms, - .get_vblank_timestamp = amdgpu_get_vblank_timestamp_kms, - .get_scanout_position = amdgpu_get_crtc_scanoutpos, + .get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos, + .get_scanout_position = amdgpu_get_crtc_scanout_position, #if defined(CONFIG_DEBUG_FS) .debugfs_init = amdgpu_debugfs_init, #endif @@ -745,7 +839,6 @@ static struct drm_driver kms_driver = { .gem_close_object = amdgpu_gem_object_close, .dumb_create = amdgpu_mode_dumb_create, .dumb_map_offset = amdgpu_mode_dumb_mmap, - .dumb_destroy = drm_gem_dumb_destroy, .fops = &amdgpu_driver_kms_fops, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, @@ -808,7 +901,7 @@ static int __init amdgpu_init(void) driver->num_ioctls = amdgpu_max_kms_ioctl; amdgpu_register_atpx_handler(); /* let modprobe override vga console setting */ - return drm_pci_init(driver, pdriver); + return pci_register_driver(pdriver); error_sched: amdgpu_fence_slab_fini(); @@ -823,7 +916,7 @@ error_sync: static void __exit amdgpu_exit(void) { amdgpu_amdkfd_fini(); - drm_pci_exit(driver, pdriver); + pci_unregister_driver(pdriver); amdgpu_unregister_atpx_handler(); amdgpu_sync_fini(); amd_sched_fence_slab_fini(); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index c0d8c6ff6380..9afa9c097e1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -118,7 +118,7 @@ static void amdgpufb_destroy_pinned_object(struct drm_gem_object *gobj) amdgpu_bo_unpin(abo); amdgpu_bo_unreserve(abo); } - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); } static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, @@ -245,13 +245,12 @@ static int amdgpufb_create(struct drm_fb_helper *helper, drm_fb_helper_fill_fix(info, fb->pitches[0], fb->format->depth); - info->flags = FBINFO_DEFAULT | FBINFO_CAN_FORCE_OUTPUT; info->fbops = &amdgpufb_ops; tmp = amdgpu_bo_gpu_offset(abo) - adev->mc.vram_start; info->fix.smem_start = adev->mc.aper_base + tmp; info->fix.smem_len = amdgpu_bo_size(abo); - info->screen_base = abo->kptr; + info->screen_base = amdgpu_bo_kptr(abo); info->screen_size = amdgpu_bo_size(abo); drm_fb_helper_fill_var(info, &rfbdev->helper, sizes->fb_width, sizes->fb_height); @@ -281,7 +280,7 @@ out: } if (fb && ret) { - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); drm_framebuffer_unregister_private(fb); drm_framebuffer_cleanup(fb); kfree(fb); @@ -312,31 +311,7 @@ static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfb return 0; } -/** Sets the color ramps on behalf of fbcon */ -static void amdgpu_crtc_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green, - u16 blue, int regno) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - - amdgpu_crtc->lut_r[regno] = red >> 6; - amdgpu_crtc->lut_g[regno] = green >> 6; - amdgpu_crtc->lut_b[regno] = blue >> 6; -} - -/** Gets the color ramps on behalf of fbcon */ -static void amdgpu_crtc_fb_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green, - u16 *blue, int regno) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - - *red = amdgpu_crtc->lut_r[regno] << 6; - *green = amdgpu_crtc->lut_g[regno] << 6; - *blue = amdgpu_crtc->lut_b[regno] << 6; -} - static const struct drm_fb_helper_funcs amdgpu_fb_helper_funcs = { - .gamma_set = amdgpu_crtc_fb_gamma_set, - .gamma_get = amdgpu_crtc_fb_gamma_get, .fb_probe = amdgpufb_create, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 7b60fb79c3a6..333bad749067 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -541,6 +541,12 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev) } } +void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring) +{ + if (ring) + amdgpu_fence_write(ring, ring->fence_drv.sync_seq); +} + /* * Common fence implementation */ @@ -660,11 +666,17 @@ static const struct drm_info_list amdgpu_debugfs_fence_list[] = { {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, {"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL} }; + +static const struct drm_info_list amdgpu_debugfs_fence_list_sriov[] = { + {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, +}; #endif int amdgpu_debugfs_fence_init(struct amdgpu_device *adev) { #if defined(CONFIG_DEBUG_FS) + if (amdgpu_sriov_vf(adev)) + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list_sriov, 1); return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2); #else return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 902e6015abca..f4370081f6e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -55,6 +55,7 @@ /* * Common GART table functions. */ + /** * amdgpu_gart_table_ram_alloc - allocate system ram for gart page table * @@ -131,7 +132,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &adev->gart.robj); + NULL, NULL, 0, &adev->gart.robj); if (r) { return r; } @@ -224,8 +225,9 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev) * * Unbinds the requested pages from the gart page table and * replaces them with the dummy page (all asics). + * Returns 0 for success, -EINVAL for failure. */ -void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, +int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int pages) { unsigned t; @@ -237,7 +239,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, if (!adev->gart.ready) { WARN(1, "trying to unbind memory from uninitialized GART !\n"); - return; + return -EINVAL; } t = offset / AMDGPU_GPU_PAGE_SIZE; @@ -258,6 +260,42 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, } mb(); amdgpu_gart_flush_gpu_tlb(adev, 0); + return 0; +} + +/** + * amdgpu_gart_map - map dma_addresses into GART entries + * + * @adev: amdgpu_device pointer + * @offset: offset into the GPU's gart aperture + * @pages: number of pages to bind + * @dma_addr: DMA addresses of pages + * + * Map the dma_addresses into GART entries (all asics). + * Returns 0 for success, -EINVAL for failure. + */ +int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, + int pages, dma_addr_t *dma_addr, uint64_t flags, + void *dst) +{ + uint64_t page_base; + unsigned i, j, t; + + if (!adev->gart.ready) { + WARN(1, "trying to bind memory to uninitialized GART !\n"); + return -EINVAL; + } + + t = offset / AMDGPU_GPU_PAGE_SIZE; + + for (i = 0; i < pages; i++) { + page_base = dma_addr[i]; + for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) { + amdgpu_gart_set_pte_pde(adev, dst, t, page_base, flags); + page_base += AMDGPU_GPU_PAGE_SIZE; + } + } + return 0; } /** @@ -277,31 +315,30 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, int pages, struct page **pagelist, dma_addr_t *dma_addr, uint64_t flags) { - unsigned t; - unsigned p; - uint64_t page_base; - int i, j; +#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS + unsigned i,t,p; +#endif + int r; if (!adev->gart.ready) { WARN(1, "trying to bind memory to uninitialized GART !\n"); return -EINVAL; } +#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS t = offset / AMDGPU_GPU_PAGE_SIZE; p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); - - for (i = 0; i < pages; i++, p++) { -#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS + for (i = 0; i < pages; i++, p++) adev->gart.pages[p] = pagelist[i]; #endif - if (adev->gart.ptr) { - page_base = dma_addr[i]; - for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) { - amdgpu_gart_set_pte_pde(adev, adev->gart.ptr, t, page_base, flags); - page_base += AMDGPU_GPU_PAGE_SIZE; - } - } + + if (adev->gart.ptr) { + r = amdgpu_gart_map(adev, offset, pages, dma_addr, flags, + adev->gart.ptr); + if (r) + return r; } + mb(); amdgpu_gart_flush_gpu_tlb(adev, 0); return 0; @@ -331,8 +368,8 @@ int amdgpu_gart_init(struct amdgpu_device *adev) if (r) return r; /* Compute table size */ - adev->gart.num_cpu_pages = adev->mc.gtt_size / PAGE_SIZE; - adev->gart.num_gpu_pages = adev->mc.gtt_size / AMDGPU_GPU_PAGE_SIZE; + adev->gart.num_cpu_pages = adev->mc.gart_size / PAGE_SIZE; + adev->gart.num_gpu_pages = adev->mc.gart_size / AMDGPU_GPU_PAGE_SIZE; DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n", adev->gart.num_cpu_pages, adev->gart.num_gpu_pages); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h new file mode 100644 index 000000000000..afbe803b1a13 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h @@ -0,0 +1,76 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_GART_H__ +#define __AMDGPU_GART_H__ + +#include <linux/types.h> + +/* + * GART structures, functions & helpers + */ +struct amdgpu_device; +struct amdgpu_bo; +struct amdgpu_gart_funcs; + +#define AMDGPU_GPU_PAGE_SIZE 4096 +#define AMDGPU_GPU_PAGE_MASK (AMDGPU_GPU_PAGE_SIZE - 1) +#define AMDGPU_GPU_PAGE_SHIFT 12 +#define AMDGPU_GPU_PAGE_ALIGN(a) (((a) + AMDGPU_GPU_PAGE_MASK) & ~AMDGPU_GPU_PAGE_MASK) + +struct amdgpu_gart { + dma_addr_t table_addr; + struct amdgpu_bo *robj; + void *ptr; + unsigned num_gpu_pages; + unsigned num_cpu_pages; + unsigned table_size; +#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS + struct page **pages; +#endif + bool ready; + + /* Asic default pte flags */ + uint64_t gart_pte_flags; + + const struct amdgpu_gart_funcs *gart_funcs; +}; + +int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev); +void amdgpu_gart_table_ram_free(struct amdgpu_device *adev); +int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev); +void amdgpu_gart_table_vram_free(struct amdgpu_device *adev); +int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); +void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); +int amdgpu_gart_init(struct amdgpu_device *adev); +void amdgpu_gart_fini(struct amdgpu_device *adev); +int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, + int pages); +int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, + int pages, dma_addr_t *dma_addr, uint64_t flags, + void *dst); +int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, + int pages, struct page **pagelist, + dma_addr_t *dma_addr, uint64_t flags); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 94cb91cf93eb..7171968f261e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -49,7 +49,6 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, struct drm_gem_object **obj) { struct amdgpu_bo *robj; - unsigned long max_size; int r; *obj = NULL; @@ -58,20 +57,9 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, alignment = PAGE_SIZE; } - if (!(initial_domain & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA))) { - /* Maximum bo size is the unpinned gtt size since we use the gtt to - * handle vram to system pool migrations. - */ - max_size = adev->mc.gtt_size - adev->gart_pin_size; - if (size > max_size) { - DRM_DEBUG("Allocation size %ldMb bigger than %ldMb limit\n", - size >> 20, max_size >> 20); - return -ENOMEM; - } - } retry: r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain, - flags, NULL, NULL, &robj); + flags, NULL, NULL, 0, &robj); if (r) { if (r != -ERESTARTSYS) { if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { @@ -103,7 +91,7 @@ void amdgpu_gem_force_release(struct amdgpu_device *adev) spin_lock(&file->table_lock); idr_for_each_entry(&file->object_idr, gobj, handle) { WARN_ONCE(1, "And also active allocations!\n"); - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); } idr_destroy(&file->object_idr); spin_unlock(&file->table_lock); @@ -219,16 +207,6 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, ttm_eu_backoff_reservation(&ticket, &list); } -static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r) -{ - if (r == -EDEADLK) { - r = amdgpu_gpu_reset(adev); - if (!r) - r = -EAGAIN; - } - return r; -} - /* * GEM ioctls. */ @@ -247,22 +225,17 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, if (args->in.domain_flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_CPU_GTT_USWC | - AMDGPU_GEM_CREATE_VRAM_CLEARED| - AMDGPU_GEM_CREATE_SHADOW | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { - r = -EINVAL; - goto error_unlock; - } + AMDGPU_GEM_CREATE_VRAM_CLEARED)) + return -EINVAL; + /* reject invalid gem domains */ if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU | AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | - AMDGPU_GEM_DOMAIN_OA)) { - r = -EINVAL; - goto error_unlock; - } + AMDGPU_GEM_DOMAIN_OA)) + return -EINVAL; /* create a gem object to contain this object in */ if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | @@ -274,10 +247,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, size = size << AMDGPU_GWS_SHIFT; else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA) size = size << AMDGPU_OA_SHIFT; - else { - r = -EINVAL; - goto error_unlock; - } + else + return -EINVAL; } size = roundup(size, PAGE_SIZE); @@ -286,21 +257,17 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, args->in.domain_flags, kernel, &gobj); if (r) - goto error_unlock; + return r; r = drm_gem_handle_create(filp, gobj, &handle); /* drop reference from allocate - handle holds it now */ - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); if (r) - goto error_unlock; + return r; memset(args, 0, sizeof(*args)); args->out.handle = handle; return 0; - -error_unlock: - r = amdgpu_gem_handle_lockup(adev, r); - return r; } int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, @@ -334,10 +301,10 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_DOMAIN_CPU, 0, 0, &gobj); if (r) - goto handle_lockup; + return r; bo = gem_to_amdgpu_bo(gobj); - bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT; + bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; r = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, args->addr, args->flags); if (r) @@ -372,9 +339,9 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, r = drm_gem_handle_create(filp, gobj, &handle); /* drop reference from allocate - handle holds it now */ - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); if (r) - goto handle_lockup; + return r; args->handle = handle; return 0; @@ -386,10 +353,7 @@ unlock_mmap_sem: up_read(¤t->mm->mmap_sem); release_object: - drm_gem_object_unreference_unlocked(gobj); - -handle_lockup: - r = amdgpu_gem_handle_lockup(adev, r); + drm_gem_object_put_unlocked(gobj); return r; } @@ -408,11 +372,11 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp, robj = gem_to_amdgpu_bo(gobj); if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm) || (robj->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) { - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); return -EPERM; } *offset_p = amdgpu_bo_mmap_offset(robj); - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); return 0; } @@ -456,7 +420,6 @@ unsigned long amdgpu_gem_timeout(uint64_t timeout_ns) int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { - struct amdgpu_device *adev = dev->dev_private; union drm_amdgpu_gem_wait_idle *args = data; struct drm_gem_object *gobj; struct amdgpu_bo *robj; @@ -483,8 +446,7 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, } else r = ret; - drm_gem_object_unreference_unlocked(gobj); - r = amdgpu_gem_handle_lockup(adev, r); + drm_gem_object_put_unlocked(gobj); return r; } @@ -527,7 +489,7 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data, unreserve: amdgpu_bo_unreserve(robj); out: - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); return r; } @@ -593,9 +555,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, uint64_t va_flags; int r = 0; - if (!adev->vm_manager.enabled) - return -ENOTTY; - if (args->va_address < AMDGPU_VA_RESERVED_SIZE) { dev_err(&dev->pdev->dev, "va_address 0x%lX is in reserved area 0x%X\n", @@ -621,6 +580,11 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->operation); return -EINVAL; } + if ((args->operation == AMDGPU_VA_OP_MAP) || + (args->operation == AMDGPU_VA_OP_REPLACE)) { + if (amdgpu_kms_vram_lost(adev, fpriv)) + return -ENODEV; + } INIT_LIST_HEAD(&list); if ((args->operation != AMDGPU_VA_OP_CLEAR) && @@ -657,7 +621,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, switch (args->operation) { case AMDGPU_VA_OP_MAP: - r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address, + r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address, args->map_size); if (r) goto error_backoff; @@ -677,7 +641,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->map_size); break; case AMDGPU_VA_OP_REPLACE: - r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address, + r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address, args->map_size); if (r) goto error_backoff; @@ -698,7 +662,7 @@ error_backoff: ttm_eu_backoff_reservation(&ticket, &list); error_unref: - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); return r; } @@ -723,11 +687,11 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, switch (args->op) { case AMDGPU_GEM_OP_GET_GEM_CREATE_INFO: { struct drm_amdgpu_gem_create_in info; - void __user *out = (void __user *)(uintptr_t)args->value; + void __user *out = u64_to_user_ptr(args->value); info.bo_size = robj->gem_base.size; info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT; - info.domains = robj->prefered_domains; + info.domains = robj->preferred_domains; info.domain_flags = robj->flags; amdgpu_bo_unreserve(robj); if (copy_to_user(out, &info, sizeof(info))) @@ -745,10 +709,10 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, amdgpu_bo_unreserve(robj); break; } - robj->prefered_domains = args->value & (AMDGPU_GEM_DOMAIN_VRAM | + robj->preferred_domains = args->value & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_CPU); - robj->allowed_domains = robj->prefered_domains; + robj->allowed_domains = robj->preferred_domains; if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; @@ -760,7 +724,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, } out: - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); return r; } @@ -788,7 +752,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, r = drm_gem_handle_create(file_priv, gobj, &handle); /* drop reference from allocate - handle holds it now */ - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); if (r) { return r; } @@ -806,6 +770,7 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) unsigned domain; const char *placement; unsigned pin_count; + uint64_t offset; domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); switch (domain) { @@ -820,9 +785,12 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) placement = " CPU"; break; } - seq_printf(m, "\t0x%08x: %12ld byte %s @ 0x%010Lx", - id, amdgpu_bo_size(bo), placement, - amdgpu_bo_gpu_offset(bo)); + seq_printf(m, "\t0x%08x: %12ld byte %s", + id, amdgpu_bo_size(bo), placement); + + offset = ACCESS_ONCE(bo->tbo.mem.start); + if (offset != AMDGPU_BO_INVALID_OFFSET) + seq_printf(m, " @ 0x%010Lx", offset); pin_count = ACCESS_ONCE(bo->pin_count); if (pin_count) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 19943356cca7..4f6c68fc1dd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -108,3 +108,210 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s p = next + 1; } } + +void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) +{ + int i, queue, pipe, mec; + + /* policy for amdgpu compute queue ownership */ + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { + queue = i % adev->gfx.mec.num_queue_per_pipe; + pipe = (i / adev->gfx.mec.num_queue_per_pipe) + % adev->gfx.mec.num_pipe_per_mec; + mec = (i / adev->gfx.mec.num_queue_per_pipe) + / adev->gfx.mec.num_pipe_per_mec; + + /* we've run out of HW */ + if (mec >= adev->gfx.mec.num_mec) + break; + + /* FIXME: spreading the queues across pipes causes perf regressions */ + if (0) { + /* policy: amdgpu owns the first two queues of the first MEC */ + if (mec == 0 && queue < 2) + set_bit(i, adev->gfx.mec.queue_bitmap); + } else { + /* policy: amdgpu owns all queues in the first pipe */ + if (mec == 0 && pipe == 0) + set_bit(i, adev->gfx.mec.queue_bitmap); + } + } + + /* update the number of active compute rings */ + adev->gfx.num_compute_rings = + bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + /* If you hit this case and edited the policy, you probably just + * need to increase AMDGPU_MAX_COMPUTE_RINGS */ + if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)) + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; +} + +static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + int queue_bit; + int mec, pipe, queue; + + queue_bit = adev->gfx.mec.num_mec + * adev->gfx.mec.num_pipe_per_mec + * adev->gfx.mec.num_queue_per_pipe; + + while (queue_bit-- >= 0) { + if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) + continue; + + amdgpu_gfx_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue); + + /* Using pipes 2/3 from MEC 2 seems cause problems */ + if (mec == 1 && pipe > 1) + continue; + + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + return 0; + } + + dev_err(adev->dev, "Failed to find a queue for KIQ\n"); + return -EINVAL; +} + +int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_irq_src *irq) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + int r = 0; + + mutex_init(&kiq->ring_mutex); + + r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); + if (r) + return r; + + ring->adev = NULL; + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = AMDGPU_DOORBELL_KIQ; + + r = amdgpu_gfx_kiq_acquire(adev, ring); + if (r) + return r; + + ring->eop_gpu_addr = kiq->eop_gpu_addr; + sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue); + r = amdgpu_ring_init(adev, ring, 1024, + irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); + if (r) + dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); + + return r; +} + +void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, + struct amdgpu_irq_src *irq) +{ + amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); + amdgpu_ring_fini(ring); +} + +void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); +} + +int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, + unsigned hpd_size) +{ + int r; + u32 *hpd; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, + &kiq->eop_gpu_addr, (void **)&hpd); + if (r) { + dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); + return r; + } + + memset(hpd, 0, hpd_size); + + r = amdgpu_bo_reserve(kiq->eop_obj, true); + if (unlikely(r != 0)) + dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); + amdgpu_bo_kunmap(kiq->eop_obj); + amdgpu_bo_unreserve(kiq->eop_obj); + + return 0; +} + +/* create MQD for each compute queue */ +int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, + unsigned mqd_size) +{ + struct amdgpu_ring *ring = NULL; + int r, i; + + /* create MQD for KIQ */ + ring = &adev->gfx.kiq.ring; + if (!ring->mqd_obj) { + r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + &ring->mqd_gpu_addr, &ring->mqd_ptr); + if (r) { + dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); + return r; + } + + /* prepare MQD backup */ + adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL); + if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]) + dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); + } + + /* create MQD for each KCQ */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (!ring->mqd_obj) { + r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + &ring->mqd_gpu_addr, &ring->mqd_ptr); + if (r) { + dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); + return r; + } + + /* prepare MQD backup */ + adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL); + if (!adev->gfx.mec.mqd_backup[i]) + dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); + } + } + + return 0; +} + +void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = NULL; + int i; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + kfree(adev->gfx.mec.mqd_backup[i]); + amdgpu_bo_free_kernel(&ring->mqd_obj, + &ring->mqd_gpu_addr, + &ring->mqd_ptr); + } + + ring = &adev->gfx.kiq.ring; + kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); + amdgpu_bo_free_kernel(&ring->mqd_obj, + &ring->mqd_gpu_addr, + &ring->mqd_ptr); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index e02044086445..1f279050d334 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -30,4 +30,64 @@ void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg); void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh); +void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev); + +int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_irq_src *irq); + +void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, + struct amdgpu_irq_src *irq); + +void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev); +int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, + unsigned hpd_size); + +int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, + unsigned mqd_size); +void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev); + +/** + * amdgpu_gfx_create_bitmask - create a bitmask + * + * @bit_width: length of the mask + * + * create a variable length bit mask. + * Returns the bitmask. + */ +static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width) +{ + return (u32)((1ULL << bit_width) - 1); +} + +static inline int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, + int mec, int pipe, int queue) +{ + int bit = 0; + + bit += mec * adev->gfx.mec.num_pipe_per_mec + * adev->gfx.mec.num_queue_per_pipe; + bit += pipe * adev->gfx.mec.num_queue_per_pipe; + bit += queue; + + return bit; +} + +static inline void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit, + int *mec, int *pipe, int *queue) +{ + *queue = bit % adev->gfx.mec.num_queue_per_pipe; + *pipe = (bit / adev->gfx.mec.num_queue_per_pipe) + % adev->gfx.mec.num_pipe_per_mec; + *mec = (bit / adev->gfx.mec.num_queue_per_pipe) + / adev->gfx.mec.num_pipe_per_mec; + +} +static inline bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, + int mec, int pipe, int queue) +{ + return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue), + adev->gfx.mec.queue_bitmap); +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index f7d22c44034d..0d15eb7d31d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -28,7 +28,7 @@ struct amdgpu_gtt_mgr { struct drm_mm mm; spinlock_t lock; - uint64_t available; + atomic64_t available; }; /** @@ -42,15 +42,19 @@ struct amdgpu_gtt_mgr { static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man, unsigned long p_size) { + struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_gtt_mgr *mgr; + uint64_t start, size; mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); if (!mgr) return -ENOMEM; - drm_mm_init(&mgr->mm, 0, p_size); + start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS; + size = (adev->mc.gart_size >> PAGE_SHIFT) - start; + drm_mm_init(&mgr->mm, start, size); spin_lock_init(&mgr->lock); - mgr->available = p_size; + atomic64_set(&mgr->available, p_size); man->priv = mgr; return 0; } @@ -81,6 +85,20 @@ static int amdgpu_gtt_mgr_fini(struct ttm_mem_type_manager *man) } /** + * amdgpu_gtt_mgr_is_allocated - Check if mem has address space + * + * @mem: the mem object to check + * + * Check if a mem object has already address space allocated. + */ +bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem) +{ + struct drm_mm_node *node = mem->mm_node; + + return (node->start != AMDGPU_BO_INVALID_OFFSET); +} + +/** * amdgpu_gtt_mgr_alloc - allocate new ranges * * @man: TTM memory type manager @@ -90,18 +108,19 @@ static int amdgpu_gtt_mgr_fini(struct ttm_mem_type_manager *man) * * Allocate the address space for a node. */ -int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, - struct ttm_buffer_object *tbo, - const struct ttm_place *place, - struct ttm_mem_reg *mem) +static int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, + struct ttm_buffer_object *tbo, + const struct ttm_place *place, + struct ttm_mem_reg *mem) { + struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_gtt_mgr *mgr = man->priv; struct drm_mm_node *node = mem->mm_node; enum drm_mm_insert_mode mode; unsigned long fpfn, lpfn; int r; - if (node->start != AMDGPU_BO_INVALID_OFFSET) + if (amdgpu_gtt_mgr_is_allocated(mem)) return 0; if (place) @@ -112,7 +131,7 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, if (place && place->lpfn) lpfn = place->lpfn; else - lpfn = man->size; + lpfn = adev->gart.num_cpu_pages; mode = DRM_MM_INSERT_BEST; if (place && place->flags & TTM_PL_FLAG_TOPDOWN) @@ -124,25 +143,12 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, fpfn, lpfn, mode); spin_unlock(&mgr->lock); - if (!r) { + if (!r) mem->start = node->start; - if (&tbo->mem == mem) - tbo->offset = (tbo->mem.start << PAGE_SHIFT) + - tbo->bdev->man[tbo->mem.mem_type].gpu_offset; - } return r; } -void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager *man) -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); - struct amdgpu_gtt_mgr *mgr = man->priv; - - seq_printf(m, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n", - man->size, mgr->available, (u64)atomic64_read(&adev->gtt_usage) >> 20); - -} /** * amdgpu_gtt_mgr_new - allocate a new node * @@ -163,11 +169,11 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man, int r; spin_lock(&mgr->lock); - if (mgr->available < mem->num_pages) { + if (atomic64_read(&mgr->available) < mem->num_pages) { spin_unlock(&mgr->lock); return 0; } - mgr->available -= mem->num_pages; + atomic64_sub(mem->num_pages, &mgr->available); spin_unlock(&mgr->lock); node = kzalloc(sizeof(*node), GFP_KERNEL); @@ -194,9 +200,7 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man, return 0; err_out: - spin_lock(&mgr->lock); - mgr->available += mem->num_pages; - spin_unlock(&mgr->lock); + atomic64_add(mem->num_pages, &mgr->available); return r; } @@ -223,30 +227,47 @@ static void amdgpu_gtt_mgr_del(struct ttm_mem_type_manager *man, spin_lock(&mgr->lock); if (node->start != AMDGPU_BO_INVALID_OFFSET) drm_mm_remove_node(node); - mgr->available += mem->num_pages; spin_unlock(&mgr->lock); + atomic64_add(mem->num_pages, &mgr->available); kfree(node); mem->mm_node = NULL; } /** + * amdgpu_gtt_mgr_usage - return usage of GTT domain + * + * @man: TTM memory type manager + * + * Return how many bytes are used in the GTT domain + */ +uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man) +{ + struct amdgpu_gtt_mgr *mgr = man->priv; + + return (u64)(man->size - atomic64_read(&mgr->available)) * PAGE_SIZE; +} + +/** * amdgpu_gtt_mgr_debug - dump VRAM table * * @man: TTM memory type manager - * @prefix: text prefix + * @printer: DRM printer to use * * Dump the table content using printk. */ static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man, - const char *prefix) + struct drm_printer *printer) { struct amdgpu_gtt_mgr *mgr = man->priv; - struct drm_printer p = drm_debug_printer(prefix); spin_lock(&mgr->lock); - drm_mm_print(&mgr->mm, &p); + drm_mm_print(&mgr->mm, printer); spin_unlock(&mgr->lock); + + drm_printf(printer, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n", + man->size, (u64)atomic64_read(&mgr->available), + amdgpu_gtt_mgr_usage(man) >> 20); } const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 6e4ae0d983c2..659997bfff30 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -121,6 +121,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; + struct dma_fence *tmp = NULL; bool skip_preamble, need_ctx_switch; unsigned patch_offset = ~0; struct amdgpu_vm *vm; @@ -129,6 +130,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, unsigned i; int r = 0; + bool need_pipe_sync = false; if (num_ibs == 0) return -EINVAL; @@ -160,11 +162,19 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, dev_err(adev->dev, "scheduling IB failed (%d).\n", r); return r; } - if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync) - amdgpu_ring_emit_pipeline_sync(ring); - if (vm) { - r = amdgpu_vm_flush(ring, job); + if (ring->funcs->emit_pipeline_sync && job && + ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || + amdgpu_vm_need_pipeline_sync(ring, job))) { + need_pipe_sync = true; + dma_fence_put(tmp); + } + + if (ring->funcs->insert_start) + ring->funcs->insert_start(ring); + + if (job) { + r = amdgpu_vm_flush(ring, job, need_pipe_sync); if (r) { amdgpu_ring_undo(ring); return r; @@ -188,8 +198,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, status |= AMDGPU_HAVE_CTX_SWITCH; status |= job->preamble_status; - if (vm) - status |= AMDGPU_VM_DOMAIN; amdgpu_ring_emit_cntxcntl(ring, status); } @@ -208,6 +216,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, need_ctx_switch = false; } + if (ring->funcs->emit_tmz) + amdgpu_ring_emit_tmz(ring, false); + if (ring->funcs->emit_hdp_invalidate #ifdef CONFIG_X86_64 && !(adev->flags & AMD_IS_APU) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index a3da1a122fc8..3de8e74e5b3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -62,8 +62,9 @@ enum amdgpu_ih_clientid AMDGPU_IH_CLIENTID_MP0 = 0x1e, AMDGPU_IH_CLIENTID_MP1 = 0x1f, - AMDGPU_IH_CLIENTID_MAX + AMDGPU_IH_CLIENTID_MAX, + AMDGPU_IH_CLIENTID_VCN = AMDGPU_IH_CLIENTID_UVD }; #define AMDGPU_IH_CLIENTID_LEGACY 0 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index a6b7e367a860..538e5f27d120 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -83,7 +83,8 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work) struct amdgpu_device *adev = container_of(work, struct amdgpu_device, reset_work); - amdgpu_gpu_reset(adev); + if (!amdgpu_sriov_vf(adev)) + amdgpu_gpu_reset(adev); } /* Disable *all* interrupts */ @@ -219,6 +220,11 @@ int amdgpu_irq_init(struct amdgpu_device *adev) int r = 0; spin_lock_init(&adev->irq.lock); + + if (!adev->enable_virtual_display) + /* Disable vblank irqs aggressively for power-saving */ + adev->ddev->vblank_disable_immediate = true; + r = drm_vblank_init(adev->ddev, adev->mode_info.num_crtc); if (r) { return r; @@ -262,7 +268,6 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) { unsigned i, j; - drm_vblank_cleanup(adev->ddev); if (adev->irq.installed) { drm_irq_uninstall(adev->ddev); adev->irq.installed = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 7570f2439a11..4510627ae83e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -36,7 +36,11 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job) job->base.sched->name, atomic_read(&job->ring->fence_drv.last_seq), job->ring->fence_drv.sync_seq); - amdgpu_gpu_reset(job->adev); + + if (amdgpu_sriov_vf(job->adev)) + amdgpu_sriov_gpu_reset(job->adev, job); + else + amdgpu_gpu_reset(job->adev); } int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, @@ -57,9 +61,10 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, (*job)->vm = vm; (*job)->ibs = (void *)&(*job)[1]; (*job)->num_ibs = num_ibs; - (*job)->need_pipeline_sync = false; amdgpu_sync_create(&(*job)->sync); + amdgpu_sync_create(&(*job)->dep_sync); + amdgpu_sync_create(&(*job)->sched_sync); return 0; } @@ -76,6 +81,8 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]); if (r) kfree(*job); + else + (*job)->vm_pd_addr = adev->gart.table_addr; return r; } @@ -98,6 +105,8 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job) dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); + amdgpu_sync_free(&job->dep_sync); + amdgpu_sync_free(&job->sched_sync); kfree(job); } @@ -107,6 +116,8 @@ void amdgpu_job_free(struct amdgpu_job *job) dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); + amdgpu_sync_free(&job->dep_sync); + amdgpu_sync_free(&job->sched_sync); kfree(job); } @@ -138,11 +149,18 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) struct amdgpu_job *job = to_amdgpu_job(sched_job); struct amdgpu_vm *vm = job->vm; - struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync); + struct dma_fence *fence = amdgpu_sync_get_fence(&job->dep_sync); + int r; + if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) { + r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence); + if (r) + DRM_ERROR("Error adding fence to sync (%d)\n", r); + } + if (!fence) + fence = amdgpu_sync_get_fence(&job->sync); while (fence == NULL && vm && !job->vm_id) { struct amdgpu_ring *ring = job->ring; - int r; r = amdgpu_vm_grab_id(vm, ring, &job->sync, &job->base.s_fence->finished, @@ -153,9 +171,6 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) fence = amdgpu_sync_get_fence(&job->sync); } - if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) - job->need_pipeline_sync = true; - return fence; } @@ -163,6 +178,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) { struct dma_fence *fence = NULL; struct amdgpu_job *job; + struct amdgpu_fpriv *fpriv = NULL; int r; if (!sched_job) { @@ -174,10 +190,16 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); trace_amdgpu_sched_run_job(job); - r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence); - if (r) - DRM_ERROR("Error scheduling IBs (%d)\n", r); - + if (job->vm) + fpriv = container_of(job->vm, struct amdgpu_fpriv, vm); + /* skip ib schedule when vram is lost */ + if (fpriv && amdgpu_kms_vram_lost(job->adev, fpriv)) + DRM_ERROR("Skip scheduling IBs!\n"); + else { + r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence); + if (r) + DRM_ERROR("Error scheduling IBs (%d)\n", r); + } /* if gpu reset, hw fence will be replaced here */ dma_fence_put(job->fence); job->fence = dma_fence_get(fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 96c341670782..e16229000a98 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -87,6 +87,41 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) struct amdgpu_device *adev; int r, acpi_status; +#ifdef CONFIG_DRM_AMDGPU_SI + if (!amdgpu_si_support) { + switch (flags & AMD_ASIC_MASK) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_HAINAN: + dev_info(dev->dev, + "SI support provided by radeon.\n"); + dev_info(dev->dev, + "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n" + ); + return -ENODEV; + } + } +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + if (!amdgpu_cik_support) { + switch (flags & AMD_ASIC_MASK) { + case CHIP_KAVERI: + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: + dev_info(dev->dev, + "CIK support provided by radeon.\n"); + dev_info(dev->dev, + "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n" + ); + return -ENODEV; + } + } +#endif + adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL); if (adev == NULL) { return -ENOMEM; @@ -123,7 +158,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) "Error during ACPI methods call\n"); } - amdgpu_amdkfd_load_interface(adev); amdgpu_amdkfd_device_probe(adev); amdgpu_amdkfd_device_init(adev); @@ -235,6 +269,7 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; struct drm_amdgpu_info *info = data; struct amdgpu_mode_info *minfo = &adev->mode_info; void __user *out = (void __user *)(uintptr_t)info->return_pointer; @@ -247,6 +282,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (!info->return_size || !info->return_pointer) return -EINVAL; + if (amdgpu_kms_vram_lost(adev, fpriv)) + return -ENODEV; switch (info->query) { case AMDGPU_INFO_ACCEL_WORKING: @@ -319,6 +356,19 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; ib_size_alignment = 1; break; + case AMDGPU_HW_IP_VCN_DEC: + type = AMD_IP_BLOCK_TYPE_VCN; + ring_mask = adev->vcn.ring_dec.ready ? 1 : 0; + ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; + ib_size_alignment = 16; + break; + case AMDGPU_HW_IP_VCN_ENC: + type = AMD_IP_BLOCK_TYPE_VCN; + for (i = 0; i < adev->vcn.num_enc_rings; i++) + ring_mask |= ((adev->vcn.ring_enc[i].ready ? 1 : 0) << i); + ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; + ib_size_alignment = 1; + break; default: return -EINVAL; } @@ -361,6 +411,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file case AMDGPU_HW_IP_UVD_ENC: type = AMD_IP_BLOCK_TYPE_UVD; break; + case AMDGPU_HW_IP_VCN_DEC: + case AMDGPU_HW_IP_VCN_ENC: + type = AMD_IP_BLOCK_TYPE_VCN; + break; default: return -EINVAL; } @@ -397,14 +451,17 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file case AMDGPU_INFO_NUM_EVICTIONS: ui64 = atomic64_read(&adev->num_evictions); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; + case AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS: + ui64 = atomic64_read(&adev->num_vram_cpu_page_faults); + return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VRAM_USAGE: - ui64 = atomic64_read(&adev->vram_usage); + ui64 = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VIS_VRAM_USAGE: - ui64 = atomic64_read(&adev->vram_vis_usage); + ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_GTT_USAGE: - ui64 = atomic64_read(&adev->gtt_usage); + ui64 = amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_GDS_CONFIG: { struct drm_amdgpu_info_gds gds_info; @@ -427,7 +484,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file vram_gtt.vram_size -= adev->vram_pin_size; vram_gtt.vram_cpu_accessible_size = adev->mc.visible_vram_size; vram_gtt.vram_cpu_accessible_size -= (adev->vram_pin_size - adev->invisible_pin_size); - vram_gtt.gtt_size = adev->mc.gtt_size; + vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size; + vram_gtt.gtt_size *= PAGE_SIZE; vram_gtt.gtt_size -= adev->gart_pin_size; return copy_to_user(out, &vram_gtt, min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0; @@ -439,7 +497,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file mem.vram.total_heap_size = adev->mc.real_vram_size; mem.vram.usable_heap_size = adev->mc.real_vram_size - adev->vram_pin_size; - mem.vram.heap_usage = atomic64_read(&adev->vram_usage); + mem.vram.heap_usage = + amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; mem.cpu_accessible_vram.total_heap_size = @@ -448,14 +507,16 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file adev->mc.visible_vram_size - (adev->vram_pin_size - adev->invisible_pin_size); mem.cpu_accessible_vram.heap_usage = - atomic64_read(&adev->vram_vis_usage); + amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); mem.cpu_accessible_vram.max_allocation = mem.cpu_accessible_vram.usable_heap_size * 3 / 4; - mem.gtt.total_heap_size = adev->mc.gtt_size; - mem.gtt.usable_heap_size = - adev->mc.gtt_size - adev->gart_pin_size; - mem.gtt.heap_usage = atomic64_read(&adev->gtt_usage); + mem.gtt.total_heap_size = adev->mman.bdev.man[TTM_PL_TT].size; + mem.gtt.total_heap_size *= PAGE_SIZE; + mem.gtt.usable_heap_size = mem.gtt.total_heap_size + - adev->gart_pin_size; + mem.gtt.heap_usage = + amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]); mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4; return copy_to_user(out, &mem, @@ -513,8 +574,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10; dev_info.max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10; } else { - dev_info.max_engine_clock = adev->pm.default_sclk * 10; - dev_info.max_memory_clock = adev->pm.default_mclk * 10; + dev_info.max_engine_clock = adev->clock.default_sclk * 10; + dev_info.max_memory_clock = adev->clock.default_mclk * 10; } dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask; dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se * @@ -529,13 +590,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); - dev_info.pte_fragment_size = (1 << AMDGPU_LOG2_PAGES_PER_FRAG) * - AMDGPU_GPU_PAGE_SIZE; + dev_info.pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE; dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE; - dev_info.cu_active_number = adev->gfx.cu_info.number; dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; dev_info.ce_ram_size = adev->gfx.ce_ram_size; + memcpy(&dev_info.cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0], + sizeof(adev->gfx.cu_info.ao_cu_bitmap)); memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0], sizeof(adev->gfx.cu_info.bitmap)); dev_info.vram_type = adev->mc.vram_type; @@ -730,6 +791,12 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev) vga_switcheroo_process_delayed_switch(); } +bool amdgpu_kms_vram_lost(struct amdgpu_device *adev, + struct amdgpu_fpriv *fpriv) +{ + return fpriv->vram_lost_counter != atomic_read(&adev->vram_lost_counter); +} + /** * amdgpu_driver_open_kms - drm callback for open * @@ -757,7 +824,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto out_suspend; } - r = amdgpu_vm_init(adev, &fpriv->vm); + r = amdgpu_vm_init(adev, &fpriv->vm, + AMDGPU_VM_CONTEXT_GFX); if (r) { kfree(fpriv); goto out_suspend; @@ -772,7 +840,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) } if (amdgpu_sriov_vf(adev)) { - r = amdgpu_map_static_csa(adev, &fpriv->vm); + r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va); if (r) goto out_suspend; } @@ -782,6 +850,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); + fpriv->vram_lost_counter = atomic_read(&adev->vram_lost_counter); file_priv->driver_priv = fpriv; out_suspend: @@ -814,16 +883,18 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); - amdgpu_uvd_free_handles(adev, file_priv); - amdgpu_vce_free_handles(adev, file_priv); + if (adev->asic_type != CHIP_RAVEN) { + amdgpu_uvd_free_handles(adev, file_priv); + amdgpu_vce_free_handles(adev, file_priv); + } amdgpu_vm_bo_rmv(adev, fpriv->prt_va); if (amdgpu_sriov_vf(adev)) { /* TODO: how to handle reserve failure */ BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true)); - amdgpu_vm_bo_rmv(adev, fpriv->vm.csa_bo_va); - fpriv->vm.csa_bo_va = NULL; + amdgpu_vm_bo_rmv(adev, fpriv->csa_va); + fpriv->csa_va = NULL; amdgpu_bo_unreserve(adev->virt.csa_obj); } @@ -945,50 +1016,10 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe) amdgpu_irq_put(adev, &adev->crtc_irq, idx); } -/** - * amdgpu_get_vblank_timestamp_kms - get vblank timestamp - * - * @dev: drm dev pointer - * @crtc: crtc to get the timestamp for - * @max_error: max error - * @vblank_time: time value - * @flags: flags passed to the driver - * - * Gets the timestamp on the requested crtc based on the - * scanout position. (all asics). - * Returns postive status flags on success, negative error on failure. - */ -int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe, - int *max_error, - struct timeval *vblank_time, - unsigned flags) -{ - struct drm_crtc *crtc; - struct amdgpu_device *adev = dev->dev_private; - - if (pipe >= dev->num_crtcs) { - DRM_ERROR("Invalid crtc %u\n", pipe); - return -EINVAL; - } - - /* Get associated drm_crtc: */ - crtc = &adev->mode_info.crtcs[pipe]->base; - if (!crtc) { - /* This can occur on driver load if some component fails to - * initialize completely and driver is unloaded */ - DRM_ERROR("Uninitialized crtc %d\n", pipe); - return -EINVAL; - } - - /* Helper routine in DRM core does all the work: */ - return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error, - vblank_time, flags, - &crtc->hwmode); -} - const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), /* KMS */ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 38f739fb727b..3b0f2ec6eec7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -51,7 +51,7 @@ struct amdgpu_mn { /* objects protected by lock */ struct mutex lock; - struct rb_root objects; + struct rb_root_cached objects; }; struct amdgpu_mn_node { @@ -76,8 +76,8 @@ static void amdgpu_mn_destroy(struct work_struct *work) mutex_lock(&adev->mn_lock); mutex_lock(&rmn->lock); hash_del(&rmn->node); - rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects, - it.rb) { + rbtree_postorder_for_each_entry_safe(node, next_node, + &rmn->objects.rb_root, it.rb) { list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { bo->mn = NULL; list_del_init(&bo->mn_list); @@ -147,36 +147,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, } /** - * amdgpu_mn_invalidate_page - callback to notify about mm change - * - * @mn: our notifier - * @mn: the mm this callback is about - * @address: address of invalidate page - * - * Invalidation of a single page. Blocks for all BOs mapping it - * and unmap them by move them into system domain again. - */ -static void amdgpu_mn_invalidate_page(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long address) -{ - struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); - struct interval_tree_node *it; - - mutex_lock(&rmn->lock); - - it = interval_tree_iter_first(&rmn->objects, address, address); - if (it) { - struct amdgpu_mn_node *node; - - node = container_of(it, struct amdgpu_mn_node, it); - amdgpu_mn_invalidate_node(node, address, address); - } - - mutex_unlock(&rmn->lock); -} - -/** * amdgpu_mn_invalidate_range_start - callback to notify about mm change * * @mn: our notifier @@ -215,7 +185,6 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, static const struct mmu_notifier_ops amdgpu_mn_ops = { .release = amdgpu_mn_release, - .invalidate_page = amdgpu_mn_invalidate_page, .invalidate_range_start = amdgpu_mn_invalidate_range_start, }; @@ -252,7 +221,7 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) rmn->mm = mm; rmn->mn.ops = &amdgpu_mn_ops; mutex_init(&rmn->lock); - rmn->objects = RB_ROOT; + rmn->objects = RB_ROOT_CACHED; r = __mmu_notifier_register(&rmn->mn, mm); if (r) @@ -359,7 +328,7 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) head = bo->mn_list.next; bo->mn = NULL; - list_del(&bo->mn_list); + list_del_init(&bo->mn_list); if (list_empty(head)) { struct amdgpu_mn_node *node; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index dbd10618ec20..2af2678ddaf6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -257,15 +257,7 @@ struct amdgpu_audio { int num_pins; }; -struct amdgpu_mode_mc_save { - u32 vga_render_control; - u32 vga_hdp_control; - bool crtc_enabled[AMDGPU_MAX_CRTCS]; -}; - struct amdgpu_display_funcs { - /* vga render */ - void (*set_vga_render_state)(struct amdgpu_device *adev, bool render); /* display watermarks */ void (*bandwidth_update)(struct amdgpu_device *adev); /* get frame count */ @@ -300,10 +292,6 @@ struct amdgpu_display_funcs { uint16_t connector_object_id, struct amdgpu_hpd *hpd, struct amdgpu_router *router); - void (*stop_mc_access)(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save); - void (*resume_mc_access)(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save); }; struct amdgpu_mode_info { @@ -369,7 +357,6 @@ struct amdgpu_atom_ss { struct amdgpu_crtc { struct drm_crtc base; int crtc_id; - u16 lut_r[256], lut_g[256], lut_b[256]; bool enabled; bool can_tile; uint32_t crtc_offset; @@ -534,6 +521,9 @@ struct amdgpu_framebuffer { ((em) == ATOM_ENCODER_MODE_DP_MST)) /* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */ +#define DRM_SCANOUTPOS_VALID (1 << 0) +#define DRM_SCANOUTPOS_IN_VBLANK (1 << 1) +#define DRM_SCANOUTPOS_ACCURATE (1 << 2) #define USE_REAL_VBLANKSTART (1 << 30) #define GET_DISTANCE_TO_VBLANKSTART (1 << 31) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 365883d7948d..9e495da0bb03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -37,55 +37,6 @@ #include "amdgpu.h" #include "amdgpu_trace.h" - - -static u64 amdgpu_get_vis_part_size(struct amdgpu_device *adev, - struct ttm_mem_reg *mem) -{ - if (mem->start << PAGE_SHIFT >= adev->mc.visible_vram_size) - return 0; - - return ((mem->start << PAGE_SHIFT) + mem->size) > - adev->mc.visible_vram_size ? - adev->mc.visible_vram_size - (mem->start << PAGE_SHIFT) : - mem->size; -} - -static void amdgpu_update_memory_usage(struct amdgpu_device *adev, - struct ttm_mem_reg *old_mem, - struct ttm_mem_reg *new_mem) -{ - u64 vis_size; - if (!adev) - return; - - if (new_mem) { - switch (new_mem->mem_type) { - case TTM_PL_TT: - atomic64_add(new_mem->size, &adev->gtt_usage); - break; - case TTM_PL_VRAM: - atomic64_add(new_mem->size, &adev->vram_usage); - vis_size = amdgpu_get_vis_part_size(adev, new_mem); - atomic64_add(vis_size, &adev->vram_vis_usage); - break; - } - } - - if (old_mem) { - switch (old_mem->mem_type) { - case TTM_PL_TT: - atomic64_sub(old_mem->size, &adev->gtt_usage); - break; - case TTM_PL_VRAM: - atomic64_sub(old_mem->size, &adev->vram_usage); - vis_size = amdgpu_get_vis_part_size(adev, old_mem); - atomic64_sub(vis_size, &adev->vram_vis_usage); - break; - } - } -} - static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) { struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); @@ -93,7 +44,7 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) bo = container_of(tbo, struct amdgpu_bo, tbo); - amdgpu_update_memory_usage(adev, &bo->tbo.mem, NULL); + amdgpu_bo_kunmap(bo); drm_gem_object_release(&bo->gem_base); amdgpu_bo_unref(&bo->parent); @@ -140,7 +91,10 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev, if (domain & AMDGPU_GEM_DOMAIN_GTT) { places[c].fpfn = 0; - places[c].lpfn = 0; + if (flags & AMDGPU_GEM_CREATE_SHADOW) + places[c].lpfn = adev->mc.gart_size >> PAGE_SHIFT; + else + places[c].lpfn = 0; places[c].flags = TTM_PL_FLAG_TT; if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) places[c].flags |= TTM_PL_FLAG_WC | @@ -219,7 +173,7 @@ static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo, } /** - * amdgpu_bo_create_kernel - create BO for kernel use + * amdgpu_bo_create_reserved - create reserved BO for kernel use * * @adev: amdgpu device object * @size: size for the new BO @@ -229,24 +183,30 @@ static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo, * @gpu_addr: GPU addr of the pinned BO * @cpu_addr: optional CPU address mapping * - * Allocates and pins a BO for kernel internal use. + * Allocates and pins a BO for kernel internal use, and returns it still + * reserved. * * Returns 0 on success, negative error code otherwise. */ -int amdgpu_bo_create_kernel(struct amdgpu_device *adev, - unsigned long size, int align, - u32 domain, struct amdgpu_bo **bo_ptr, - u64 *gpu_addr, void **cpu_addr) +int amdgpu_bo_create_reserved(struct amdgpu_device *adev, + unsigned long size, int align, + u32 domain, struct amdgpu_bo **bo_ptr, + u64 *gpu_addr, void **cpu_addr) { + bool free = false; int r; - r = amdgpu_bo_create(adev, size, align, true, domain, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, bo_ptr); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", r); - return r; + if (!*bo_ptr) { + r = amdgpu_bo_create(adev, size, align, true, domain, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, + NULL, NULL, 0, bo_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", + r); + return r; + } + free = true; } r = amdgpu_bo_reserve(*bo_ptr, false); @@ -269,20 +229,52 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, } } - amdgpu_bo_unreserve(*bo_ptr); - return 0; error_unreserve: amdgpu_bo_unreserve(*bo_ptr); error_free: - amdgpu_bo_unref(bo_ptr); + if (free) + amdgpu_bo_unref(bo_ptr); return r; } /** + * amdgpu_bo_create_kernel - create BO for kernel use + * + * @adev: amdgpu device object + * @size: size for the new BO + * @align: alignment for the new BO + * @domain: where to place it + * @bo_ptr: resulting BO + * @gpu_addr: GPU addr of the pinned BO + * @cpu_addr: optional CPU address mapping + * + * Allocates and pins a BO for kernel internal use. + * + * Returns 0 on success, negative error code otherwise. + */ +int amdgpu_bo_create_kernel(struct amdgpu_device *adev, + unsigned long size, int align, + u32 domain, struct amdgpu_bo **bo_ptr, + u64 *gpu_addr, void **cpu_addr) +{ + int r; + + r = amdgpu_bo_create_reserved(adev, size, align, domain, bo_ptr, + gpu_addr, cpu_addr); + + if (r) + return r; + + amdgpu_bo_unreserve(*bo_ptr); + + return 0; +} + +/** * amdgpu_bo_free_kernel - free BO for kernel use * * @bo: amdgpu BO to free @@ -317,12 +309,13 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, struct sg_table *sg, struct ttm_placement *placement, struct reservation_object *resv, + uint64_t init_value, struct amdgpu_bo **bo_ptr) { struct amdgpu_bo *bo; enum ttm_bo_type type; unsigned long page_align; - u64 initial_bytes_moved; + u64 initial_bytes_moved, bytes_moved; size_t acc_size; int r; @@ -351,13 +344,13 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, } INIT_LIST_HEAD(&bo->shadow_list); INIT_LIST_HEAD(&bo->va); - bo->prefered_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM | + bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_CPU | AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA); - bo->allowed_domains = bo->prefered_domains; + bo->allowed_domains = bo->preferred_domains; if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; @@ -398,8 +391,14 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, !kernel, NULL, acc_size, sg, resv, &amdgpu_ttm_bo_destroy); - amdgpu_cs_report_moved_bytes(adev, - atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved); + bytes_moved = atomic64_read(&adev->num_bytes_moved) - + initial_bytes_moved; + if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + bo->tbo.mem.mem_type == TTM_PL_VRAM && + bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) + amdgpu_cs_report_moved_bytes(adev, bytes_moved, bytes_moved); + else + amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0); if (unlikely(r != 0)) return r; @@ -411,7 +410,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { struct dma_fence *fence; - r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence); + r = amdgpu_fill_buffer(bo, init_value, bo->tbo.resv, &fence); if (unlikely(r)) goto fail_unreserve; @@ -426,6 +425,10 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, trace_amdgpu_bo_create(bo); + /* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */ + if (type == ttm_bo_type_device) + bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + return 0; fail_unreserve: @@ -446,19 +449,19 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, if (bo->shadow) return 0; - bo->flags |= AMDGPU_GEM_CREATE_SHADOW; - memset(&placements, 0, - (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place)); - - amdgpu_ttm_placement_init(adev, &placement, - placements, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC); + memset(&placements, 0, sizeof(placements)); + amdgpu_ttm_placement_init(adev, &placement, placements, + AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_CREATE_CPU_GTT_USWC | + AMDGPU_GEM_CREATE_SHADOW); r = amdgpu_bo_create_restricted(adev, size, byte_align, true, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC, + AMDGPU_GEM_CREATE_CPU_GTT_USWC | + AMDGPU_GEM_CREATE_SHADOW, NULL, &placement, bo->tbo.resv, + 0, &bo->shadow); if (!r) { bo->shadow->parent = amdgpu_bo_ref(bo); @@ -470,39 +473,41 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, return r; } +/* init_value will only take effect when flags contains + * AMDGPU_GEM_CREATE_VRAM_CLEARED. + */ int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags, struct sg_table *sg, struct reservation_object *resv, + uint64_t init_value, struct amdgpu_bo **bo_ptr) { struct ttm_placement placement = {0}; struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; + uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW; int r; - memset(&placements, 0, - (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place)); - - amdgpu_ttm_placement_init(adev, &placement, - placements, domain, flags); + memset(&placements, 0, sizeof(placements)); + amdgpu_ttm_placement_init(adev, &placement, placements, + domain, parent_flags); - r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel, - domain, flags, sg, &placement, - resv, bo_ptr); + r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel, domain, + parent_flags, sg, &placement, resv, + init_value, bo_ptr); if (r) return r; - if (amdgpu_need_backup(adev) && (flags & AMDGPU_GEM_CREATE_SHADOW)) { - if (!resv) { - r = ww_mutex_lock(&(*bo_ptr)->tbo.resv->lock, NULL); - WARN_ON(r != 0); - } + if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) { + if (!resv) + WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv, + NULL)); r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr)); if (!resv) - ww_mutex_unlock(&(*bo_ptr)->tbo.resv->lock); + reservation_object_unlock((*bo_ptr)->tbo.resv); if (r) amdgpu_bo_unref(bo_ptr); @@ -535,7 +540,7 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, r = amdgpu_copy_buffer(ring, bo_addr, shadow_addr, amdgpu_bo_size(bo), resv, fence, - direct); + direct, false); if (!r) amdgpu_bo_fence(bo, *fence, true); @@ -551,7 +556,7 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo) if (bo->pin_count) return 0; - domain = bo->prefered_domains; + domain = bo->preferred_domains; retry: amdgpu_ttm_placement_from_domain(bo, domain); @@ -588,7 +593,7 @@ int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr, amdgpu_bo_size(bo), resv, fence, - direct); + direct, false); if (!r) amdgpu_bo_fence(bo, *fence, true); @@ -598,16 +603,16 @@ err: int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) { - bool is_iomem; + void *kptr; long r; if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) return -EPERM; - if (bo->kptr) { - if (ptr) { - *ptr = bo->kptr; - } + kptr = amdgpu_bo_kptr(bo); + if (kptr) { + if (ptr) + *ptr = kptr; return 0; } @@ -620,19 +625,23 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) if (r) return r; - bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem); if (ptr) - *ptr = bo->kptr; + *ptr = amdgpu_bo_kptr(bo); return 0; } +void *amdgpu_bo_kptr(struct amdgpu_bo *bo) +{ + bool is_iomem; + + return ttm_kmap_obj_virtual(&bo->kmap, &is_iomem); +} + void amdgpu_bo_kunmap(struct amdgpu_bo *bo) { - if (bo->kptr == NULL) - return; - bo->kptr = NULL; - ttm_bo_kunmap(&bo->kmap); + if (bo->kmap.bo) + ttm_bo_kunmap(&bo->kmap); } struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo) @@ -724,15 +733,16 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, dev_err(adev->dev, "%p pin failed\n", bo); goto error; } - r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); - if (unlikely(r)) { - dev_err(adev->dev, "%p bind failed\n", bo); - goto error; - } bo->pin_count = 1; - if (gpu_addr != NULL) + if (gpu_addr != NULL) { + r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); + if (unlikely(r)) { + dev_err(adev->dev, "%p bind failed\n", bo); + goto error; + } *gpu_addr = amdgpu_bo_gpu_offset(bo); + } if (domain == AMDGPU_GEM_DOMAIN_VRAM) { adev->vram_pin_size += amdgpu_bo_size(bo); if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) @@ -921,6 +931,8 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, abo = container_of(bo, struct amdgpu_bo, tbo); amdgpu_vm_bo_invalidate(adev, abo); + amdgpu_bo_kunmap(abo); + /* remember the eviction */ if (evict) atomic64_inc(&adev->num_evictions); @@ -930,8 +942,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, return; /* move_notify is called before move happens */ - amdgpu_update_memory_usage(adev, &bo->mem, new_mem); - trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type); } @@ -939,19 +949,22 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct amdgpu_bo *abo; - unsigned long offset, size, lpfn; - int i, r; + unsigned long offset, size; + int r; if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) return 0; abo = container_of(bo, struct amdgpu_bo, tbo); + + /* Remember that this BO was accessed by the CPU */ + abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + if (bo->mem.mem_type != TTM_PL_VRAM) return 0; size = bo->mem.num_pages << PAGE_SHIFT; offset = bo->mem.start << PAGE_SHIFT; - /* TODO: figure out how to map scattered VRAM to the CPU */ if ((offset + size) <= adev->mc.visible_vram_size) return 0; @@ -960,26 +973,22 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) return -EINVAL; /* hurrah the memory is not visible ! */ - amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM); - lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; - for (i = 0; i < abo->placement.num_placement; i++) { - /* Force into visible VRAM */ - if ((abo->placements[i].flags & TTM_PL_FLAG_VRAM) && - (!abo->placements[i].lpfn || - abo->placements[i].lpfn > lpfn)) - abo->placements[i].lpfn = lpfn; - } + atomic64_inc(&adev->num_vram_cpu_page_faults); + amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT); + + /* Avoid costly evictions; only set GTT as a busy placement */ + abo->placement.num_busy_placement = 1; + abo->placement.busy_placement = &abo->placements[1]; + r = ttm_bo_validate(bo, &abo->placement, false, false); - if (unlikely(r == -ENOMEM)) { - amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); - return ttm_bo_validate(bo, &abo->placement, false, false); - } else if (unlikely(r != 0)) { + if (unlikely(r != 0)) return r; - } offset = bo->mem.start << PAGE_SHIFT; /* this should never happen */ - if ((offset + size) > adev->mc.visible_vram_size) + if (bo->mem.mem_type == TTM_PL_VRAM && + (offset + size) > adev->mc.visible_vram_size) return -EINVAL; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 382485115b06..a288fa6d72c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -33,6 +33,61 @@ #define AMDGPU_BO_INVALID_OFFSET LONG_MAX +/* bo virtual addresses in a vm */ +struct amdgpu_bo_va_mapping { + struct list_head list; + struct rb_node rb; + uint64_t start; + uint64_t last; + uint64_t __subtree_last; + uint64_t offset; + uint64_t flags; +}; + +/* User space allocated BO in a VM */ +struct amdgpu_bo_va { + struct amdgpu_vm_bo_base base; + + /* protected by bo being reserved */ + struct dma_fence *last_pt_update; + unsigned ref_count; + + /* mappings for this bo_va */ + struct list_head invalids; + struct list_head valids; +}; + +struct amdgpu_bo { + /* Protected by tbo.reserved */ + u32 preferred_domains; + u32 allowed_domains; + struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; + struct ttm_placement placement; + struct ttm_buffer_object tbo; + struct ttm_bo_kmap_obj kmap; + u64 flags; + unsigned pin_count; + u64 tiling_flags; + u64 metadata_flags; + void *metadata; + u32 metadata_size; + unsigned prime_shared_count; + /* list of all virtual address to which this bo is associated to */ + struct list_head va; + /* Constant after initialization */ + struct drm_gem_object gem_base; + struct amdgpu_bo *parent; + struct amdgpu_bo *shadow; + + struct ttm_bo_kmap_obj dma_buf_vmap; + struct amdgpu_mn *mn; + + union { + struct list_head mn_list; + struct list_head shadow_list; + }; +}; + /** * amdgpu_mem_type_to_domain - return domain corresponding to mem_type * @mem_type: ttm memory type @@ -120,7 +175,11 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo) */ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) { - return bo->tbo.mem.mem_type != TTM_PL_SYSTEM; + switch (bo->tbo.mem.mem_type) { + case TTM_PL_TT: return amdgpu_ttm_is_bound(bo->tbo.ttm); + case TTM_PL_VRAM: return true; + default: return false; + } } int amdgpu_bo_create(struct amdgpu_device *adev, @@ -128,6 +187,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, bool kernel, u32 domain, u64 flags, struct sg_table *sg, struct reservation_object *resv, + uint64_t init_value, struct amdgpu_bo **bo_ptr); int amdgpu_bo_create_restricted(struct amdgpu_device *adev, unsigned long size, int byte_align, @@ -135,7 +195,12 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, struct sg_table *sg, struct ttm_placement *placement, struct reservation_object *resv, + uint64_t init_value, struct amdgpu_bo **bo_ptr); +int amdgpu_bo_create_reserved(struct amdgpu_device *adev, + unsigned long size, int align, + u32 domain, struct amdgpu_bo **bo_ptr, + u64 *gpu_addr, void **cpu_addr); int amdgpu_bo_create_kernel(struct amdgpu_device *adev, unsigned long size, int align, u32 domain, struct amdgpu_bo **bo_ptr, @@ -143,6 +208,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, void **cpu_addr); int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr); +void *amdgpu_bo_kptr(struct amdgpu_bo *bo); void amdgpu_bo_kunmap(struct amdgpu_bo *bo); struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo); void amdgpu_bo_unref(struct amdgpu_bo **bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h index c19c4d138751..f21a7716b90e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h @@ -30,6 +30,7 @@ struct cg_flag_name const char *name; }; +void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev); int amdgpu_pm_sysfs_init(struct amdgpu_device *adev); void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev); void amdgpu_pm_print_power_states(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index f5ae871aa11c..b7e1c026c0c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -72,6 +72,7 @@ static int amdgpu_pp_early_init(void *handle) case CHIP_CARRIZO: case CHIP_STONEY: case CHIP_VEGA10: + case CHIP_RAVEN: adev->pp_enabled = true; if (amdgpu_create_pp_handle(adev)) return -EINVAL; @@ -187,6 +188,9 @@ static int amdgpu_pp_hw_fini(void *handle) int ret = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->pp_enabled && adev->pm.dpm_enabled) + amdgpu_pm_sysfs_fini(adev); + if (adev->powerplay.ip_funcs->hw_fini) ret = adev->powerplay.ip_funcs->hw_fini( adev->powerplay.pp_handle); @@ -205,10 +209,9 @@ static void amdgpu_pp_late_fini(void *handle) adev->powerplay.ip_funcs->late_fini( adev->powerplay.pp_handle); - if (adev->pp_enabled && adev->pm.dpm_enabled) - amdgpu_pm_sysfs_fini(adev); - amd_powerplay_destroy(adev->powerplay.pp_handle); + if (adev->pp_enabled) + amd_powerplay_destroy(adev->powerplay.pp_handle); } static int amdgpu_pp_suspend(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 6bdc866570ab..5b3f92891f89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -69,7 +69,7 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, ww_mutex_lock(&resv->lock, NULL); ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, false, - AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, &bo); + AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, 0, &bo); ww_mutex_unlock(&resv->lock); if (ret) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index ac5e92e5d59d..8c2204c7b384 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -24,12 +24,13 @@ */ #include <linux/firmware.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_psp.h" #include "amdgpu_ucode.h" #include "soc15_common.h" #include "psp_v3_1.h" +#include "psp_v10_0.h" static void psp_set_funcs(struct amdgpu_device *adev); @@ -61,6 +62,17 @@ static int psp_sw_init(void *handle) psp->compare_sram_data = psp_v3_1_compare_sram_data; psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk; break; + case CHIP_RAVEN: +#if 0 + psp->init_microcode = psp_v10_0_init_microcode; +#endif + psp->prep_cmd_buf = psp_v10_0_prep_cmd_buf; + psp->ring_init = psp_v10_0_ring_init; + psp->ring_create = psp_v10_0_ring_create; + psp->ring_destroy = psp_v10_0_ring_destroy; + psp->cmd_submit = psp_v10_0_cmd_submit; + psp->compare_sram_data = psp_v10_0_compare_sram_data; + break; default: return -EINVAL; } @@ -88,9 +100,8 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index, int i; struct amdgpu_device *adev = psp->adev; - val = RREG32(reg_index); - for (i = 0; i < adev->usec_timeout; i++) { + val = RREG32(reg_index); if (check_changed) { if (val != reg_val) return 0; @@ -111,33 +122,18 @@ psp_cmd_submit_buf(struct psp_context *psp, int index) { int ret; - struct amdgpu_bo *cmd_buf_bo; - uint64_t cmd_buf_mc_addr; - struct psp_gfx_cmd_resp *cmd_buf_mem; - struct amdgpu_device *adev = psp->adev; - - ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &cmd_buf_bo, &cmd_buf_mc_addr, - (void **)&cmd_buf_mem); - if (ret) - return ret; - memset(cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE); + memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE); - memcpy(cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp)); + memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp)); - ret = psp_cmd_submit(psp, ucode, cmd_buf_mc_addr, + ret = psp_cmd_submit(psp, ucode, psp->cmd_buf_mc_addr, fence_mc_addr, index); while (*((unsigned int *)psp->fence_buf) != index) { msleep(1); } - amdgpu_bo_free_kernel(&cmd_buf_bo, - &cmd_buf_mc_addr, - (void **)&cmd_buf_mem); - return ret; } @@ -145,8 +141,8 @@ static void psp_prep_tmr_cmd_buf(struct psp_gfx_cmd_resp *cmd, uint64_t tmr_mc, uint32_t size) { cmd->cmd_id = GFX_CMD_ID_SETUP_TMR; - cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = (uint32_t)tmr_mc; - cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = (uint32_t)(tmr_mc >> 32); + cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = lower_32_bits(tmr_mc); + cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = upper_32_bits(tmr_mc); cmd->cmd.cmd_setup_tmr.buf_size = size; } @@ -230,6 +226,13 @@ static int psp_asd_load(struct psp_context *psp) int ret; struct psp_gfx_cmd_resp *cmd; + /* If PSP version doesn't match ASD version, asd loading will be failed. + * add workaround to bypass it for sriov now. + * TODO: add version check to make it common + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!cmd) return -ENOMEM; @@ -319,14 +322,11 @@ static int psp_load_fw(struct amdgpu_device *adev) { int ret; struct psp_context *psp = &adev->psp; - struct psp_gfx_cmd_resp *cmd; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) + psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!psp->cmd) return -ENOMEM; - psp->cmd = cmd; - ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, AMDGPU_GEM_DOMAIN_GTT, &psp->fw_pri_bo, @@ -341,13 +341,20 @@ static int psp_load_fw(struct amdgpu_device *adev) &psp->fence_buf_mc_addr, &psp->fence_buf); if (ret) + goto failed_mem2; + + ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, + (void **)&psp->cmd_buf_mem); + if (ret) goto failed_mem1; memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE); ret = psp_ring_init(psp, PSP_RING_TYPE__KM); if (ret) - goto failed_mem1; + goto failed_mem; ret = psp_tmr_init(psp); if (ret) @@ -365,18 +372,21 @@ static int psp_load_fw(struct amdgpu_device *adev) if (ret) goto failed_mem; - kfree(cmd); - return 0; failed_mem: + amdgpu_bo_free_kernel(&psp->cmd_buf_bo, + &psp->cmd_buf_mc_addr, + (void **)&psp->cmd_buf_mem); +failed_mem1: amdgpu_bo_free_kernel(&psp->fence_buf_bo, &psp->fence_buf_mc_addr, &psp->fence_buf); -failed_mem1: +failed_mem2: amdgpu_bo_free_kernel(&psp->fw_pri_bo, &psp->fw_pri_mc_addr, &psp->fw_pri_buf); failed: - kfree(cmd); + kfree(psp->cmd); + psp->cmd = NULL; return ret; } @@ -425,16 +435,18 @@ static int psp_hw_fini(void *handle) psp_ring_destroy(psp, PSP_RING_TYPE__KM); - if (psp->tmr_buf) - amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); - - if (psp->fw_pri_buf) - amdgpu_bo_free_kernel(&psp->fw_pri_bo, - &psp->fw_pri_mc_addr, &psp->fw_pri_buf); + amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); + amdgpu_bo_free_kernel(&psp->fw_pri_bo, + &psp->fw_pri_mc_addr, &psp->fw_pri_buf); + amdgpu_bo_free_kernel(&psp->fence_buf_bo, + &psp->fence_buf_mc_addr, &psp->fence_buf); + amdgpu_bo_free_kernel(&psp->asd_shared_bo, &psp->asd_shared_mc_addr, + &psp->asd_shared_buf); + amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, + (void **)&psp->cmd_buf_mem); - if (psp->fence_buf_bo) - amdgpu_bo_free_kernel(&psp->fence_buf_bo, - &psp->fence_buf_mc_addr, &psp->fence_buf); + kfree(psp->cmd); + psp->cmd = NULL; return 0; } @@ -542,3 +554,12 @@ const struct amdgpu_ip_block_version psp_v3_1_ip_block = .rev = 0, .funcs = &psp_ip_funcs, }; + +const struct amdgpu_ip_block_version psp_v10_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_PSP, + .major = 10, + .minor = 0, + .rev = 0, + .funcs = &psp_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 0301e4e0b297..538fa9dbfb21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -108,6 +108,11 @@ struct psp_context struct amdgpu_bo *fence_buf_bo; uint64_t fence_buf_mc_addr; void *fence_buf; + + /* cmd buffer */ + struct amdgpu_bo *cmd_buf_bo; + uint64_t cmd_buf_mc_addr; + struct psp_gfx_cmd_resp *cmd_buf_mem; }; struct amdgpu_psp_funcs { @@ -138,4 +143,6 @@ extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, uint32_t field_val, uint32_t mask, bool check_changed); +extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c new file mode 100644 index 000000000000..befc09b68543 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c @@ -0,0 +1,299 @@ +/* + * Copyright 2017 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Andres Rodriguez + */ + +#include "amdgpu.h" +#include "amdgpu_ring.h" + +static int amdgpu_queue_mapper_init(struct amdgpu_queue_mapper *mapper, + int hw_ip) +{ + if (!mapper) + return -EINVAL; + + if (hw_ip > AMDGPU_MAX_IP_NUM) + return -EINVAL; + + mapper->hw_ip = hw_ip; + mutex_init(&mapper->lock); + + memset(mapper->queue_map, 0, sizeof(mapper->queue_map)); + + return 0; +} + +static struct amdgpu_ring *amdgpu_get_cached_map(struct amdgpu_queue_mapper *mapper, + int ring) +{ + return mapper->queue_map[ring]; +} + +static int amdgpu_update_cached_map(struct amdgpu_queue_mapper *mapper, + int ring, struct amdgpu_ring *pring) +{ + if (WARN_ON(mapper->queue_map[ring])) { + DRM_ERROR("Un-expected ring re-map\n"); + return -EINVAL; + } + + mapper->queue_map[ring] = pring; + + return 0; +} + +static int amdgpu_identity_map(struct amdgpu_device *adev, + struct amdgpu_queue_mapper *mapper, + int ring, + struct amdgpu_ring **out_ring) +{ + switch (mapper->hw_ip) { + case AMDGPU_HW_IP_GFX: + *out_ring = &adev->gfx.gfx_ring[ring]; + break; + case AMDGPU_HW_IP_COMPUTE: + *out_ring = &adev->gfx.compute_ring[ring]; + break; + case AMDGPU_HW_IP_DMA: + *out_ring = &adev->sdma.instance[ring].ring; + break; + case AMDGPU_HW_IP_UVD: + *out_ring = &adev->uvd.ring; + break; + case AMDGPU_HW_IP_VCE: + *out_ring = &adev->vce.ring[ring]; + break; + case AMDGPU_HW_IP_UVD_ENC: + *out_ring = &adev->uvd.ring_enc[ring]; + break; + case AMDGPU_HW_IP_VCN_DEC: + *out_ring = &adev->vcn.ring_dec; + break; + case AMDGPU_HW_IP_VCN_ENC: + *out_ring = &adev->vcn.ring_enc[ring]; + break; + default: + *out_ring = NULL; + DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip); + return -EINVAL; + } + + return amdgpu_update_cached_map(mapper, ring, *out_ring); +} + +static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip) +{ + switch (hw_ip) { + case AMDGPU_HW_IP_GFX: + return AMDGPU_RING_TYPE_GFX; + case AMDGPU_HW_IP_COMPUTE: + return AMDGPU_RING_TYPE_COMPUTE; + case AMDGPU_HW_IP_DMA: + return AMDGPU_RING_TYPE_SDMA; + case AMDGPU_HW_IP_UVD: + return AMDGPU_RING_TYPE_UVD; + case AMDGPU_HW_IP_VCE: + return AMDGPU_RING_TYPE_VCE; + default: + DRM_ERROR("Invalid HW IP specified %d\n", hw_ip); + return -1; + } +} + +static int amdgpu_lru_map(struct amdgpu_device *adev, + struct amdgpu_queue_mapper *mapper, + int user_ring, + struct amdgpu_ring **out_ring) +{ + int r, i, j; + int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip); + int ring_blacklist[AMDGPU_MAX_RINGS]; + struct amdgpu_ring *ring; + + /* 0 is a valid ring index, so initialize to -1 */ + memset(ring_blacklist, 0xff, sizeof(ring_blacklist)); + + for (i = 0, j = 0; i < AMDGPU_MAX_RINGS; i++) { + ring = mapper->queue_map[i]; + if (ring) + ring_blacklist[j++] = ring->idx; + } + + r = amdgpu_ring_lru_get(adev, ring_type, ring_blacklist, + j, out_ring); + if (r) + return r; + + return amdgpu_update_cached_map(mapper, user_ring, *out_ring); +} + +/** + * amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct + * + * @adev: amdgpu_device pointer + * @mgr: amdgpu_queue_mgr structure holding queue information + * + * Initialize the the selected @mgr (all asics). + * + * Returns 0 on success, error on failure. + */ +int amdgpu_queue_mgr_init(struct amdgpu_device *adev, + struct amdgpu_queue_mgr *mgr) +{ + int i, r; + + if (!adev || !mgr) + return -EINVAL; + + memset(mgr, 0, sizeof(*mgr)); + + for (i = 0; i < AMDGPU_MAX_IP_NUM; ++i) { + r = amdgpu_queue_mapper_init(&mgr->mapper[i], i); + if (r) + return r; + } + + return 0; +} + +/** + * amdgpu_queue_mgr_fini - de-initialize an amdgpu_queue_mgr struct + * + * @adev: amdgpu_device pointer + * @mgr: amdgpu_queue_mgr structure holding queue information + * + * De-initialize the the selected @mgr (all asics). + * + * Returns 0 on success, error on failure. + */ +int amdgpu_queue_mgr_fini(struct amdgpu_device *adev, + struct amdgpu_queue_mgr *mgr) +{ + return 0; +} + +/** + * amdgpu_queue_mgr_map - Map a userspace ring id to an amdgpu_ring + * + * @adev: amdgpu_device pointer + * @mgr: amdgpu_queue_mgr structure holding queue information + * @hw_ip: HW IP enum + * @instance: HW instance + * @ring: user ring id + * @our_ring: pointer to mapped amdgpu_ring + * + * Map a userspace ring id to an appropriate kernel ring. Different + * policies are configurable at a HW IP level. + * + * Returns 0 on success, error on failure. + */ +int amdgpu_queue_mgr_map(struct amdgpu_device *adev, + struct amdgpu_queue_mgr *mgr, + int hw_ip, int instance, int ring, + struct amdgpu_ring **out_ring) +{ + int r, ip_num_rings; + struct amdgpu_queue_mapper *mapper = &mgr->mapper[hw_ip]; + + if (!adev || !mgr || !out_ring) + return -EINVAL; + + if (hw_ip >= AMDGPU_MAX_IP_NUM) + return -EINVAL; + + if (ring >= AMDGPU_MAX_RINGS) + return -EINVAL; + + /* Right now all IPs have only one instance - multiple rings. */ + if (instance != 0) { + DRM_ERROR("invalid ip instance: %d\n", instance); + return -EINVAL; + } + + switch (hw_ip) { + case AMDGPU_HW_IP_GFX: + ip_num_rings = adev->gfx.num_gfx_rings; + break; + case AMDGPU_HW_IP_COMPUTE: + ip_num_rings = adev->gfx.num_compute_rings; + break; + case AMDGPU_HW_IP_DMA: + ip_num_rings = adev->sdma.num_instances; + break; + case AMDGPU_HW_IP_UVD: + ip_num_rings = 1; + break; + case AMDGPU_HW_IP_VCE: + ip_num_rings = adev->vce.num_rings; + break; + case AMDGPU_HW_IP_UVD_ENC: + ip_num_rings = adev->uvd.num_enc_rings; + break; + case AMDGPU_HW_IP_VCN_DEC: + ip_num_rings = 1; + break; + case AMDGPU_HW_IP_VCN_ENC: + ip_num_rings = adev->vcn.num_enc_rings; + break; + default: + DRM_ERROR("unknown ip type: %d\n", hw_ip); + return -EINVAL; + } + + if (ring >= ip_num_rings) { + DRM_ERROR("Ring index:%d exceeds maximum:%d for ip:%d\n", + ring, ip_num_rings, hw_ip); + return -EINVAL; + } + + mutex_lock(&mapper->lock); + + *out_ring = amdgpu_get_cached_map(mapper, ring); + if (*out_ring) { + /* cache hit */ + r = 0; + goto out_unlock; + } + + switch (mapper->hw_ip) { + case AMDGPU_HW_IP_GFX: + case AMDGPU_HW_IP_UVD: + case AMDGPU_HW_IP_VCE: + case AMDGPU_HW_IP_UVD_ENC: + case AMDGPU_HW_IP_VCN_DEC: + case AMDGPU_HW_IP_VCN_ENC: + r = amdgpu_identity_map(adev, mapper, ring, out_ring); + break; + case AMDGPU_HW_IP_DMA: + case AMDGPU_HW_IP_COMPUTE: + r = amdgpu_lru_map(adev, mapper, ring, out_ring); + break; + default: + *out_ring = NULL; + r = -EINVAL; + DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip); + } + +out_unlock: + mutex_unlock(&mapper->lock); + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 6a85db0c0bc3..5ce65280b396 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -135,6 +135,8 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) if (ring->funcs->end_use) ring->funcs->end_use(ring); + + amdgpu_ring_lru_touch(ring->adev, ring); } /** @@ -168,6 +170,16 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned irq_type) { int r; + int sched_hw_submission = amdgpu_sched_hw_submission; + + /* Set the hw submission limit higher for KIQ because + * it's used for a number of gfx/compute tasks by both + * KFD and KGD which may have outstanding fences and + * it doesn't really use the gpu scheduler anyway; + * KIQ tasks get submitted directly to the ring. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + sched_hw_submission = max(sched_hw_submission, 256); if (ring->adev == NULL) { if (adev->num_rings >= AMDGPU_MAX_RINGS) @@ -176,38 +188,21 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ring->adev = adev; ring->idx = adev->num_rings++; adev->rings[ring->idx] = ring; - r = amdgpu_fence_driver_init_ring(ring, - amdgpu_sched_hw_submission); + r = amdgpu_fence_driver_init_ring(ring, sched_hw_submission); if (r) return r; } - if (ring->funcs->support_64bit_ptrs) { - r = amdgpu_wb_get_64bit(adev, &ring->rptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); - return r; - } - - r = amdgpu_wb_get_64bit(adev, &ring->wptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); - return r; - } - - } else { - r = amdgpu_wb_get(adev, &ring->rptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); - return r; - } - - r = amdgpu_wb_get(adev, &ring->wptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_wb_get(adev, &ring->rptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); + return r; + } + r = amdgpu_wb_get(adev, &ring->wptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); + return r; } r = amdgpu_wb_get(adev, &ring->fence_offs); @@ -232,8 +227,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, return r; } - ring->ring_size = roundup_pow_of_two(max_dw * 4 * - amdgpu_sched_hw_submission); + ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission); ring->buf_mask = (ring->ring_size / 4) - 1; ring->ptr_mask = ring->funcs->support_64bit_ptrs ? @@ -253,10 +247,13 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, } ring->max_dw = max_dw; + INIT_LIST_HEAD(&ring->lru_list); + amdgpu_ring_lru_touch(adev, ring); if (amdgpu_debugfs_ring_init(adev, ring)) { DRM_ERROR("Failed to register debugfs file for rings !\n"); } + return 0; } @@ -272,18 +269,15 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) { ring->ready = false; - if (ring->funcs->support_64bit_ptrs) { - amdgpu_wb_free_64bit(ring->adev, ring->cond_exe_offs); - amdgpu_wb_free_64bit(ring->adev, ring->fence_offs); - amdgpu_wb_free_64bit(ring->adev, ring->rptr_offs); - amdgpu_wb_free_64bit(ring->adev, ring->wptr_offs); - } else { - amdgpu_wb_free(ring->adev, ring->cond_exe_offs); - amdgpu_wb_free(ring->adev, ring->fence_offs); - amdgpu_wb_free(ring->adev, ring->rptr_offs); - amdgpu_wb_free(ring->adev, ring->wptr_offs); - } + /* Not to finish a ring which is not initialized */ + if (!(ring->adev) || !(ring->adev->rings[ring->idx])) + return; + amdgpu_wb_free(ring->adev, ring->rptr_offs); + amdgpu_wb_free(ring->adev, ring->wptr_offs); + + amdgpu_wb_free(ring->adev, ring->cond_exe_offs); + amdgpu_wb_free(ring->adev, ring->fence_offs); amdgpu_bo_free_kernel(&ring->ring_obj, &ring->gpu_addr, @@ -294,6 +288,84 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) ring->adev->rings[ring->idx] = NULL; } +static void amdgpu_ring_lru_touch_locked(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + /* list_move_tail handles the case where ring isn't part of the list */ + list_move_tail(&ring->lru_list, &adev->ring_lru_list); +} + +static bool amdgpu_ring_is_blacklisted(struct amdgpu_ring *ring, + int *blacklist, int num_blacklist) +{ + int i; + + for (i = 0; i < num_blacklist; i++) { + if (ring->idx == blacklist[i]) + return true; + } + + return false; +} + +/** + * amdgpu_ring_lru_get - get the least recently used ring for a HW IP block + * + * @adev: amdgpu_device pointer + * @type: amdgpu_ring_type enum + * @blacklist: blacklisted ring ids array + * @num_blacklist: number of entries in @blacklist + * @ring: output ring + * + * Retrieve the amdgpu_ring structure for the least recently used ring of + * a specific IP block (all asics). + * Returns 0 on success, error on failure. + */ +int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist, + int num_blacklist, struct amdgpu_ring **ring) +{ + struct amdgpu_ring *entry; + + /* List is sorted in LRU order, find first entry corresponding + * to the desired HW IP */ + *ring = NULL; + spin_lock(&adev->ring_lru_list_lock); + list_for_each_entry(entry, &adev->ring_lru_list, lru_list) { + if (entry->funcs->type != type) + continue; + + if (amdgpu_ring_is_blacklisted(entry, blacklist, num_blacklist)) + continue; + + *ring = entry; + amdgpu_ring_lru_touch_locked(adev, *ring); + break; + } + spin_unlock(&adev->ring_lru_list_lock); + + if (!*ring) { + DRM_ERROR("Ring LRU contains no entries for ring type:%d\n", type); + return -EINVAL; + } + + return 0; +} + +/** + * amdgpu_ring_lru_touch - mark a ring as recently being used + * + * @adev: amdgpu_device pointer + * @ring: ring to touch + * + * Move @ring to the tail of the lru list + */ +void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + spin_lock(&adev->ring_lru_list_lock); + amdgpu_ring_lru_touch_locked(adev, ring); + spin_unlock(&adev->ring_lru_list_lock); +} + /* * Debugfs info */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 944443c5b90a..322d25299a00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -47,7 +47,9 @@ enum amdgpu_ring_type { AMDGPU_RING_TYPE_UVD, AMDGPU_RING_TYPE_VCE, AMDGPU_RING_TYPE_KIQ, - AMDGPU_RING_TYPE_UVD_ENC + AMDGPU_RING_TYPE_UVD_ENC, + AMDGPU_RING_TYPE_VCN_DEC, + AMDGPU_RING_TYPE_VCN_ENC }; struct amdgpu_device; @@ -76,6 +78,7 @@ struct amdgpu_fence_driver { int amdgpu_fence_driver_init(struct amdgpu_device *adev); void amdgpu_fence_driver_fini(struct amdgpu_device *adev); void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev); +void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, unsigned num_hw_submission); @@ -130,6 +133,7 @@ struct amdgpu_ring_funcs { int (*test_ib)(struct amdgpu_ring *ring, long timeout); /* insert NOP packets */ void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); + void (*insert_start)(struct amdgpu_ring *ring); void (*insert_end)(struct amdgpu_ring *ring); /* pad the indirect buffer to the necessary number of dw */ void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib); @@ -142,6 +146,7 @@ struct amdgpu_ring_funcs { void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); + void (*emit_tmz)(struct amdgpu_ring *ring, bool start); }; struct amdgpu_ring { @@ -149,6 +154,7 @@ struct amdgpu_ring { const struct amdgpu_ring_funcs *funcs; struct amdgpu_fence_driver fence_drv; struct amd_gpu_scheduler sched; + struct list_head lru_list; struct amdgpu_bo *ring_obj; volatile uint32_t *ring; @@ -180,6 +186,7 @@ struct amdgpu_ring { u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; unsigned vm_inv_eng; + bool has_compute_vm_bug; #if defined(CONFIG_DEBUG_FS) struct dentry *ent; #endif @@ -194,6 +201,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned ring_size, struct amdgpu_irq_src *irq_src, unsigned irq_type); void amdgpu_ring_fini(struct amdgpu_ring *ring); +int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist, + int num_blacklist, struct amdgpu_ring **ring); +void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring); static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) { int i = 0; @@ -202,4 +212,44 @@ static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) } +static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) +{ + if (ring->count_dw <= 0) + DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); + ring->ring[ring->wptr++ & ring->buf_mask] = v; + ring->wptr &= ring->ptr_mask; + ring->count_dw--; +} + +static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, + void *src, int count_dw) +{ + unsigned occupied, chunk1, chunk2; + void *dst; + + if (unlikely(ring->count_dw < count_dw)) + DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); + + occupied = ring->wptr & ring->buf_mask; + dst = (void *)&ring->ring[occupied]; + chunk1 = ring->buf_mask + 1 - occupied; + chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; + chunk2 = count_dw - chunk1; + chunk1 <<= 2; + chunk2 <<= 2; + + if (chunk1) + memcpy(dst, src, chunk1); + + if (chunk2) { + src += chunk1; + dst = (void *)ring->ring; + memcpy(dst, src, chunk2); + } + + ring->wptr += count_dw; + ring->wptr &= ring->ptr_mask; + ring->count_dw -= count_dw; +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 5ca75a456ad2..3144400435b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -64,7 +64,7 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev, INIT_LIST_HEAD(&sa_manager->flist[i]); r = amdgpu_bo_create(adev, size, align, true, domain, - 0, NULL, NULL, &sa_manager->bo); + 0, NULL, NULL, 0, &sa_manager->bo); if (r) { dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index ed814e6d0207..c586f44312f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -244,6 +244,12 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct dma_fence *f = e->fence; struct amd_sched_fence *s_fence = to_amd_sched_fence(f); + if (dma_fence_is_signaled(f)) { + hash_del(&e->node); + dma_fence_put(f); + kmem_cache_free(amdgpu_sync_slab, e); + continue; + } if (ring && s_fence) { /* For fences from the same ring it is sufficient * when they are scheduled. @@ -256,13 +262,6 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, } } - if (dma_fence_is_signaled(f)) { - hash_del(&e->node); - dma_fence_put(f); - kmem_cache_free(amdgpu_sync_slab, e); - continue; - } - return f; } @@ -298,6 +297,25 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) return NULL; } +int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr) +{ + struct amdgpu_sync_entry *e; + struct hlist_node *tmp; + int i, r; + + hash_for_each_safe(sync->fences, i, tmp, e, node) { + r = dma_fence_wait(e->fence, intr); + if (r) + return r; + + hash_del(&e->node); + dma_fence_put(e->fence); + kmem_cache_free(amdgpu_sync_slab, e); + } + + return 0; +} + /** * amdgpu_sync_free - free the sync object * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 605be266e07f..dc7687993317 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -49,6 +49,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); +int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); void amdgpu_sync_free(struct amdgpu_sync *sync); int amdgpu_sync_init(void); void amdgpu_sync_fini(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index 15510dadde01..ed8c3739015b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -33,7 +33,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_bo *vram_obj = NULL; struct amdgpu_bo **gtt_obj = NULL; - uint64_t gtt_addr, vram_addr; + uint64_t gart_addr, vram_addr; unsigned n, size; int i, r; @@ -42,7 +42,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) /* Number of tests = * (Total GTT - IB pool - writeback page - ring buffers) / test size */ - n = adev->mc.gtt_size - AMDGPU_IB_POOL_SIZE*64*1024; + n = adev->mc.gart_size - AMDGPU_IB_POOL_SIZE*64*1024; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) if (adev->rings[i]) n -= adev->rings[i]->ring_size; @@ -61,7 +61,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, 0, - NULL, NULL, &vram_obj); + NULL, NULL, 0, &vram_obj); if (r) { DRM_ERROR("Failed to create VRAM object\n"); goto out_cleanup; @@ -76,13 +76,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) } for (i = 0; i < n; i++) { void *gtt_map, *vram_map; - void **gtt_start, **gtt_end; + void **gart_start, **gart_end; void **vram_start, **vram_end; struct dma_fence *fence = NULL; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, - NULL, gtt_obj + i); + NULL, 0, gtt_obj + i); if (r) { DRM_ERROR("Failed to create GTT object %d\n", i); goto out_lclean; @@ -91,7 +91,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) r = amdgpu_bo_reserve(gtt_obj[i], false); if (unlikely(r != 0)) goto out_lclean_unref; - r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT, >t_addr); + r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT, &gart_addr); if (r) { DRM_ERROR("Failed to pin GTT object %d\n", i); goto out_lclean_unres; @@ -103,15 +103,15 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) goto out_lclean_unpin; } - for (gtt_start = gtt_map, gtt_end = gtt_map + size; - gtt_start < gtt_end; - gtt_start++) - *gtt_start = gtt_start; + for (gart_start = gtt_map, gart_end = gtt_map + size; + gart_start < gart_end; + gart_start++) + *gart_start = gart_start; amdgpu_bo_kunmap(gtt_obj[i]); - r = amdgpu_copy_buffer(ring, gtt_addr, vram_addr, - size, NULL, &fence, false); + r = amdgpu_copy_buffer(ring, gart_addr, vram_addr, + size, NULL, &fence, false, false); if (r) { DRM_ERROR("Failed GTT->VRAM copy %d\n", i); @@ -132,21 +132,21 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) goto out_lclean_unpin; } - for (gtt_start = gtt_map, gtt_end = gtt_map + size, + for (gart_start = gtt_map, gart_end = gtt_map + size, vram_start = vram_map, vram_end = vram_map + size; vram_start < vram_end; - gtt_start++, vram_start++) { - if (*vram_start != gtt_start) { + gart_start++, vram_start++) { + if (*vram_start != gart_start) { DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, " "expected 0x%p (GTT/VRAM offset " "0x%16llx/0x%16llx)\n", - i, *vram_start, gtt_start, + i, *vram_start, gart_start, (unsigned long long) - (gtt_addr - adev->mc.gtt_start + - (void*)gtt_start - gtt_map), + (gart_addr - adev->mc.gart_start + + (void*)gart_start - gtt_map), (unsigned long long) (vram_addr - adev->mc.vram_start + - (void*)gtt_start - gtt_map)); + (void*)gart_start - gtt_map)); amdgpu_bo_kunmap(vram_obj); goto out_lclean_unpin; } @@ -155,8 +155,8 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) amdgpu_bo_kunmap(vram_obj); - r = amdgpu_copy_buffer(ring, vram_addr, gtt_addr, - size, NULL, &fence, false); + r = amdgpu_copy_buffer(ring, vram_addr, gart_addr, + size, NULL, &fence, false, false); if (r) { DRM_ERROR("Failed VRAM->GTT copy %d\n", i); @@ -177,20 +177,20 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) goto out_lclean_unpin; } - for (gtt_start = gtt_map, gtt_end = gtt_map + size, + for (gart_start = gtt_map, gart_end = gtt_map + size, vram_start = vram_map, vram_end = vram_map + size; - gtt_start < gtt_end; - gtt_start++, vram_start++) { - if (*gtt_start != vram_start) { + gart_start < gart_end; + gart_start++, vram_start++) { + if (*gart_start != vram_start) { DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, " "expected 0x%p (VRAM/GTT offset " "0x%16llx/0x%16llx)\n", - i, *gtt_start, vram_start, + i, *gart_start, vram_start, (unsigned long long) (vram_addr - adev->mc.vram_start + (void*)vram_start - vram_map), (unsigned long long) - (gtt_addr - adev->mc.gtt_start + + (gart_addr - adev->mc.gart_start + (void*)vram_start - vram_map)); amdgpu_bo_kunmap(gtt_obj[i]); goto out_lclean_unpin; @@ -200,7 +200,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) amdgpu_bo_kunmap(gtt_obj[i]); DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n", - gtt_addr - adev->mc.gtt_start); + gart_addr - adev->mc.gart_start); continue; out_lclean_unpin: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 8601904e670a..1c88bd5e29ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -14,6 +14,62 @@ #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \ job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished) +TRACE_EVENT(amdgpu_ttm_tt_populate, + TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, uint64_t phys_address), + TP_ARGS(adev, dma_address, phys_address), + TP_STRUCT__entry( + __field(uint16_t, domain) + __field(uint8_t, bus) + __field(uint8_t, slot) + __field(uint8_t, func) + __field(uint64_t, dma) + __field(uint64_t, phys) + ), + TP_fast_assign( + __entry->domain = pci_domain_nr(adev->pdev->bus); + __entry->bus = adev->pdev->bus->number; + __entry->slot = PCI_SLOT(adev->pdev->devfn); + __entry->func = PCI_FUNC(adev->pdev->devfn); + __entry->dma = dma_address; + __entry->phys = phys_address; + ), + TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx", + (unsigned)__entry->domain, + (unsigned)__entry->bus, + (unsigned)__entry->slot, + (unsigned)__entry->func, + (unsigned long long)__entry->dma, + (unsigned long long)__entry->phys) +); + +TRACE_EVENT(amdgpu_ttm_tt_unpopulate, + TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, uint64_t phys_address), + TP_ARGS(adev, dma_address, phys_address), + TP_STRUCT__entry( + __field(uint16_t, domain) + __field(uint8_t, bus) + __field(uint8_t, slot) + __field(uint8_t, func) + __field(uint64_t, dma) + __field(uint64_t, phys) + ), + TP_fast_assign( + __entry->domain = pci_domain_nr(adev->pdev->bus); + __entry->bus = adev->pdev->bus->number; + __entry->slot = PCI_SLOT(adev->pdev->devfn); + __entry->func = PCI_FUNC(adev->pdev->devfn); + __entry->dma = dma_address; + __entry->phys = phys_address; + ), + TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx", + (unsigned)__entry->domain, + (unsigned)__entry->bus, + (unsigned)__entry->slot, + (unsigned)__entry->func, + (unsigned long long)__entry->dma, + (unsigned long long)__entry->phys) +); + TRACE_EVENT(amdgpu_mm_rreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_ARGS(did, reg, value), @@ -105,12 +161,12 @@ TRACE_EVENT(amdgpu_bo_create, __entry->bo = bo; __entry->pages = bo->tbo.num_pages; __entry->type = bo->tbo.mem.mem_type; - __entry->prefer = bo->prefered_domains; + __entry->prefer = bo->preferred_domains; __entry->allow = bo->allowed_domains; __entry->visible = bo->flags; ), - TP_printk("bo=%p, pages=%u, type=%d, prefered=%d, allowed=%d, visible=%d", + TP_printk("bo=%p, pages=%u, type=%d, preferred=%d, allowed=%d, visible=%d", __entry->bo, __entry->pages, __entry->type, __entry->prefer, __entry->allow, __entry->visible) ); @@ -224,17 +280,17 @@ TRACE_EVENT(amdgpu_vm_bo_map, __field(long, start) __field(long, last) __field(u64, offset) - __field(u32, flags) + __field(u64, flags) ), TP_fast_assign( - __entry->bo = bo_va ? bo_va->bo : NULL; + __entry->bo = bo_va ? bo_va->base.bo : NULL; __entry->start = mapping->start; __entry->last = mapping->last; __entry->offset = mapping->offset; __entry->flags = mapping->flags; ), - TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x", + TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%llx", __entry->bo, __entry->start, __entry->last, __entry->offset, __entry->flags) ); @@ -248,17 +304,17 @@ TRACE_EVENT(amdgpu_vm_bo_unmap, __field(long, start) __field(long, last) __field(u64, offset) - __field(u32, flags) + __field(u64, flags) ), TP_fast_assign( - __entry->bo = bo_va->bo; + __entry->bo = bo_va->base.bo; __entry->start = mapping->start; __entry->last = mapping->last; __entry->offset = mapping->offset; __entry->flags = mapping->flags; ), - TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x", + TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%llx", __entry->bo, __entry->start, __entry->last, __entry->offset, __entry->flags) ); @@ -269,7 +325,7 @@ DECLARE_EVENT_CLASS(amdgpu_vm_mapping, TP_STRUCT__entry( __field(u64, soffset) __field(u64, eoffset) - __field(u32, flags) + __field(u64, flags) ), TP_fast_assign( @@ -277,7 +333,7 @@ DECLARE_EVENT_CLASS(amdgpu_vm_mapping, __entry->eoffset = mapping->last + 1; __entry->flags = mapping->flags; ), - TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x", + TP_printk("soffs=%010llx, eoffs=%010llx, flags=%llx", __entry->soffset, __entry->eoffset, __entry->flags) ); @@ -293,14 +349,14 @@ DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_mapping, TRACE_EVENT(amdgpu_vm_set_ptes, TP_PROTO(uint64_t pe, uint64_t addr, unsigned count, - uint32_t incr, uint32_t flags), + uint32_t incr, uint64_t flags), TP_ARGS(pe, addr, count, incr, flags), TP_STRUCT__entry( __field(u64, pe) __field(u64, addr) __field(u32, count) __field(u32, incr) - __field(u32, flags) + __field(u64, flags) ), TP_fast_assign( @@ -310,7 +366,7 @@ TRACE_EVENT(amdgpu_vm_set_ptes, __entry->incr = incr; __entry->flags = flags; ), - TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%08x, count=%u", + TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u", __entry->pe, __entry->addr, __entry->incr, __entry->flags, __entry->count) ); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 5db0230e45c6..bc746131987f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -29,11 +29,11 @@ * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> * Dave Airlie */ -#include <ttm/ttm_bo_api.h> -#include <ttm/ttm_bo_driver.h> -#include <ttm/ttm_placement.h> -#include <ttm/ttm_module.h> -#include <ttm/ttm_page_alloc.h> +#include <drm/ttm/ttm_bo_api.h> +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> +#include <drm/ttm/ttm_module.h> +#include <drm/ttm/ttm_page_alloc.h> #include <drm/drmP.h> #include <drm/amdgpu_drm.h> #include <linux/seq_file.h> @@ -43,14 +43,20 @@ #include <linux/pagemap.h> #include <linux/debugfs.h> #include "amdgpu.h" +#include "amdgpu_trace.h" #include "bif/bif_4_1_d.h" #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) +static int amdgpu_map_buffer(struct ttm_buffer_object *bo, + struct ttm_mem_reg *mem, unsigned num_pages, + uint64_t offset, unsigned window, + struct amdgpu_ring *ring, + uint64_t *addr); + static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev); static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev); - /* * Global memory. */ @@ -97,6 +103,8 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) goto error_bo; } + mutex_init(&adev->mman.gtt_window_lock); + ring = adev->mman.buffer_funcs_ring; rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; r = amd_sched_entity_init(&ring->sched, &adev->mman.entity, @@ -123,6 +131,7 @@ static void amdgpu_ttm_global_fini(struct amdgpu_device *adev) if (adev->mman.mem_global_referenced) { amd_sched_entity_fini(adev->mman.entity.sched, &adev->mman.entity); + mutex_destroy(&adev->mman.gtt_window_lock); drm_global_item_unref(&adev->mman.bo_global_ref.ref); drm_global_item_unref(&adev->mman.mem_global_ref); adev->mman.mem_global_referenced = false; @@ -150,7 +159,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, break; case TTM_PL_TT: man->func = &amdgpu_gtt_mgr_func; - man->gpu_offset = adev->mc.gtt_start; + man->gpu_offset = adev->mc.gart_start; man->available_caching = TTM_PL_MASK_CACHING; man->default_caching = TTM_PL_FLAG_CACHED; man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA; @@ -186,12 +195,11 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct amdgpu_bo *abo; - static struct ttm_place placements = { + static const struct ttm_place placements = { .fpfn = 0, .lpfn = 0, .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM }; - unsigned i; if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) { placement->placement = &placements; @@ -207,22 +215,36 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, adev->mman.buffer_funcs_ring && adev->mman.buffer_funcs_ring->ready == false) { amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); + } else if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { + unsigned fpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; + struct drm_mm_node *node = bo->mem.mm_node; + unsigned long pages_left; + + for (pages_left = bo->mem.num_pages; + pages_left; + pages_left -= node->size, node++) { + if (node->start < fpfn) + break; + } + + if (!pages_left) + goto gtt; + + /* Try evicting to the CPU inaccessible part of VRAM + * first, but only set GTT as busy placement, so this + * BO will be evicted to GTT rather than causing other + * BOs to be evicted from VRAM + */ + amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT); + abo->placements[0].fpfn = fpfn; + abo->placements[0].lpfn = 0; + abo->placement.busy_placement = &abo->placements[1]; + abo->placement.num_busy_placement = 1; } else { +gtt: amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); - for (i = 0; i < abo->placement.num_placement; ++i) { - if (!(abo->placements[i].flags & - TTM_PL_FLAG_TT)) - continue; - - if (abo->placements[i].lpfn) - continue; - - /* set an upper limit to force directly - * allocating address space for the BO. - */ - abo->placements[i].lpfn = - adev->mc.gtt_size >> PAGE_SHIFT; - } } break; case TTM_PL_TT: @@ -252,29 +274,18 @@ static void amdgpu_move_null(struct ttm_buffer_object *bo, new_mem->mm_node = NULL; } -static int amdgpu_mm_node_addr(struct ttm_buffer_object *bo, - struct drm_mm_node *mm_node, - struct ttm_mem_reg *mem, - uint64_t *addr) +static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, + struct drm_mm_node *mm_node, + struct ttm_mem_reg *mem) { - int r; - - switch (mem->mem_type) { - case TTM_PL_TT: - r = amdgpu_ttm_bind(bo, mem); - if (r) - return r; + uint64_t addr = 0; - case TTM_PL_VRAM: - *addr = mm_node->start << PAGE_SHIFT; - *addr += bo->bdev->man[mem->mem_type].gpu_offset; - break; - default: - DRM_ERROR("Unknown placement %d\n", mem->mem_type); - return -EINVAL; + if (mem->mem_type != TTM_PL_TT || + amdgpu_gtt_mgr_is_allocated(mem)) { + addr = mm_node->start << PAGE_SHIFT; + addr += bo->bdev->man[mem->mem_type].gpu_offset; } - - return 0; + return addr; } static int amdgpu_move_blit(struct ttm_buffer_object *bo, @@ -299,26 +310,40 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, } old_mm = old_mem->mm_node; - r = amdgpu_mm_node_addr(bo, old_mm, old_mem, &old_start); - if (r) - return r; old_size = old_mm->size; - + old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem); new_mm = new_mem->mm_node; - r = amdgpu_mm_node_addr(bo, new_mm, new_mem, &new_start); - if (r) - return r; new_size = new_mm->size; + new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem); num_pages = new_mem->num_pages; + mutex_lock(&adev->mman.gtt_window_lock); while (num_pages) { - unsigned long cur_pages = min(old_size, new_size); + unsigned long cur_pages = min(min(old_size, new_size), + (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE); + uint64_t from = old_start, to = new_start; struct dma_fence *next; - r = amdgpu_copy_buffer(ring, old_start, new_start, + if (old_mem->mem_type == TTM_PL_TT && + !amdgpu_gtt_mgr_is_allocated(old_mem)) { + r = amdgpu_map_buffer(bo, old_mem, cur_pages, + old_start, 0, ring, &from); + if (r) + goto error; + } + + if (new_mem->mem_type == TTM_PL_TT && + !amdgpu_gtt_mgr_is_allocated(new_mem)) { + r = amdgpu_map_buffer(bo, new_mem, cur_pages, + new_start, 1, ring, &to); + if (r) + goto error; + } + + r = amdgpu_copy_buffer(ring, from, to, cur_pages * PAGE_SIZE, - bo->resv, &next, false); + bo->resv, &next, false, true); if (r) goto error; @@ -331,10 +356,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, old_size -= cur_pages; if (!old_size) { - r = amdgpu_mm_node_addr(bo, ++old_mm, old_mem, - &old_start); - if (r) - goto error; + old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem); old_size = old_mm->size; } else { old_start += cur_pages * PAGE_SIZE; @@ -342,22 +364,21 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, new_size -= cur_pages; if (!new_size) { - r = amdgpu_mm_node_addr(bo, ++new_mm, new_mem, - &new_start); - if (r) - goto error; - + new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem); new_size = new_mm->size; } else { new_start += cur_pages * PAGE_SIZE; } } + mutex_unlock(&adev->mman.gtt_window_lock); r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); dma_fence_put(fence); return r; error: + mutex_unlock(&adev->mman.gtt_window_lock); + if (fence) dma_fence_wait(fence, false); dma_fence_put(fence); @@ -384,7 +405,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, placement.num_busy_placement = 1; placement.busy_placement = &placements; placements.fpfn = 0; - placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT; + placements.lpfn = 0; placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_gpu); @@ -431,7 +452,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, placement.num_busy_placement = 1; placement.busy_placement = &placements; placements.fpfn = 0; - placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT; + placements.lpfn = 0; placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_gpu); @@ -507,6 +528,15 @@ memcpy: } } + if (bo->type == ttm_bo_type_device && + new_mem->mem_type == TTM_PL_VRAM && + old_mem->mem_type != TTM_PL_VRAM) { + /* amdgpu_bo_fault_reserve_notify will re-set this if the CPU + * accesses the BO after it's moved. + */ + abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + } + /* update statistics */ atomic64_add((u64)bo->num_pages << PAGE_SHIFT, &adev->num_bytes_moved); return 0; @@ -633,6 +663,38 @@ release_pages: return r; } +static void amdgpu_trace_dma_map(struct ttm_tt *ttm) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); + struct amdgpu_ttm_tt *gtt = (void *)ttm; + unsigned i; + + if (unlikely(trace_amdgpu_ttm_tt_populate_enabled())) { + for (i = 0; i < ttm->num_pages; i++) { + trace_amdgpu_ttm_tt_populate( + adev, + gtt->ttm.dma_address[i], + page_to_phys(ttm->pages[i])); + } + } +} + +static void amdgpu_trace_dma_unmap(struct ttm_tt *ttm) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); + struct amdgpu_ttm_tt *gtt = (void *)ttm; + unsigned i; + + if (unlikely(trace_amdgpu_ttm_tt_unpopulate_enabled())) { + for (i = 0; i < ttm->num_pages; i++) { + trace_amdgpu_ttm_tt_unpopulate( + adev, + gtt->ttm.dma_address[i], + page_to_phys(ttm->pages[i])); + } + } +} + /* prepare the sg table with the user pages */ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) { @@ -659,6 +721,8 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); + amdgpu_trace_dma_map(ttm); + return 0; release_sg: @@ -692,6 +756,8 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) put_page(page); } + amdgpu_trace_dma_unmap(ttm); + sg_free_table(ttm->sg); } @@ -699,7 +765,8 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) { struct amdgpu_ttm_tt *gtt = (void*)ttm; - int r; + uint64_t flags; + int r = 0; if (gtt->userptr) { r = amdgpu_ttm_tt_pin_userptr(ttm); @@ -718,7 +785,25 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, bo_mem->mem_type == AMDGPU_PL_OA) return -EINVAL; - return 0; + if (!amdgpu_gtt_mgr_is_allocated(bo_mem)) + return 0; + + spin_lock(>t->adev->gtt_list_lock); + flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); + gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; + r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, + ttm->pages, gtt->ttm.dma_address, flags); + + if (r) { + DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", + ttm->num_pages, gtt->offset); + goto error_gart_bind; + } + + list_add_tail(>t->list, >t->adev->gtt_list); +error_gart_bind: + spin_unlock(>t->adev->gtt_list_lock); + return r; } bool amdgpu_ttm_is_bound(struct ttm_tt *ttm) @@ -730,35 +815,39 @@ bool amdgpu_ttm_is_bound(struct ttm_tt *ttm) int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct ttm_tt *ttm = bo->ttm; - struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; - uint64_t flags; + struct ttm_mem_reg tmp; + + struct ttm_placement placement; + struct ttm_place placements; int r; if (!ttm || amdgpu_ttm_is_bound(ttm)) return 0; - r = amdgpu_gtt_mgr_alloc(&bo->bdev->man[TTM_PL_TT], bo, - NULL, bo_mem); - if (r) { - DRM_ERROR("Failed to allocate GTT address space (%d)\n", r); + tmp = bo->mem; + tmp.mm_node = NULL; + placement.num_placement = 1; + placement.placement = &placements; + placement.num_busy_placement = 1; + placement.busy_placement = &placements; + placements.fpfn = 0; + placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT; + placements.flags = bo->mem.placement | TTM_PL_FLAG_TT; + + r = ttm_bo_mem_space(bo, &placement, &tmp, true, false); + if (unlikely(r)) return r; - } - flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); - gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; - r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, - ttm->pages, gtt->ttm.dma_address, flags); + r = ttm_bo_move_ttm(bo, true, false, &tmp); + if (unlikely(r)) + ttm_bo_mem_put(bo, &tmp); + else + bo->offset = (bo->mem.start << PAGE_SHIFT) + + bo->bdev->man[bo->mem.mem_type].gpu_offset; - if (r) { - DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", - ttm->num_pages, gtt->offset); - return r; - } - spin_lock(>t->adev->gtt_list_lock); - list_add_tail(>t->list, >t->adev->gtt_list); - spin_unlock(>t->adev->gtt_list_lock); - return 0; + return r; } int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) @@ -789,6 +878,7 @@ int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; + int r; if (gtt->userptr) amdgpu_ttm_tt_unpin_userptr(ttm); @@ -797,14 +887,17 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) return 0; /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ - if (gtt->adev->gart.ready) - amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages); - spin_lock(>t->adev->gtt_list_lock); + r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages); + if (r) { + DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n", + gtt->ttm.ttm.num_pages, gtt->offset); + goto error_unbind; + } list_del_init(>t->list); +error_unbind: spin_unlock(>t->adev->gtt_list_lock); - - return 0; + return r; } static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) @@ -846,7 +939,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev, static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) { - struct amdgpu_device *adev; + struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; unsigned i; int r; @@ -869,14 +962,14 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); ttm->state = tt_unbound; - return 0; + r = 0; + goto trace_mappings; } - adev = amdgpu_ttm_adev(ttm->bdev); - #ifdef CONFIG_SWIOTLB if (swiotlb_nr_tbl()) { - return ttm_dma_populate(>t->ttm, adev->dev); + r = ttm_dma_populate(>t->ttm, adev->dev); + goto trace_mappings; } #endif @@ -899,7 +992,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) return -EFAULT; } } - return 0; + + r = 0; +trace_mappings: + if (likely(!r)) + amdgpu_trace_dma_map(ttm); + return r; } static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) @@ -920,6 +1018,8 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) adev = amdgpu_ttm_adev(ttm->bdev); + amdgpu_trace_dma_unmap(ttm); + #ifdef CONFIG_SWIOTLB if (swiotlb_nr_tbl()) { ttm_dma_unpopulate(>t->ttm, adev->dev); @@ -1069,6 +1169,67 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, return ttm_bo_eviction_valuable(bo, place); } +static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, + unsigned long offset, + void *buf, int len, int write) +{ + struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); + struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); + struct drm_mm_node *nodes = abo->tbo.mem.mm_node; + uint32_t value = 0; + int ret = 0; + uint64_t pos; + unsigned long flags; + + if (bo->mem.mem_type != TTM_PL_VRAM) + return -EIO; + + while (offset >= (nodes->size << PAGE_SHIFT)) { + offset -= nodes->size << PAGE_SHIFT; + ++nodes; + } + pos = (nodes->start << PAGE_SHIFT) + offset; + + while (len && pos < adev->mc.mc_vram_size) { + uint64_t aligned_pos = pos & ~(uint64_t)3; + uint32_t bytes = 4 - (pos & 3); + uint32_t shift = (pos & 3) * 8; + uint32_t mask = 0xffffffff << shift; + + if (len < bytes) { + mask &= 0xffffffff >> (bytes - len) * 8; + bytes = len; + } + + spin_lock_irqsave(&adev->mmio_idx_lock, flags); + WREG32(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000); + WREG32(mmMM_INDEX_HI, aligned_pos >> 31); + if (!write || mask != 0xffffffff) + value = RREG32(mmMM_DATA); + if (write) { + value &= ~mask; + value |= (*(uint32_t *)buf << shift) & mask; + WREG32(mmMM_DATA, value); + } + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); + if (!write) { + value = (value & mask) >> shift; + memcpy(buf, &value, bytes); + } + + ret += bytes; + buf = (uint8_t *)buf + bytes; + pos += bytes; + len -= bytes; + if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) { + ++nodes; + pos = (nodes->start << PAGE_SHIFT); + } + } + + return ret; +} + static struct ttm_bo_driver amdgpu_bo_driver = { .ttm_tt_create = &amdgpu_ttm_tt_create, .ttm_tt_populate = &amdgpu_ttm_tt_populate, @@ -1084,11 +1245,14 @@ static struct ttm_bo_driver amdgpu_bo_driver = { .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, .io_mem_free = &amdgpu_ttm_io_mem_free, .io_mem_pfn = amdgpu_ttm_io_mem_pfn, + .access_memory = &amdgpu_ttm_access_memory }; int amdgpu_ttm_init(struct amdgpu_device *adev) { + uint64_t gtt_size; int r; + u64 vis_vram_limit; r = amdgpu_ttm_global_init(adev); if (r) { @@ -1112,36 +1276,37 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_ERROR("Failed initializing VRAM heap.\n"); return r; } + + /* Reduce size of CPU-visible VRAM if requested */ + vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024; + if (amdgpu_vis_vram_limit > 0 && + vis_vram_limit <= adev->mc.visible_vram_size) + adev->mc.visible_vram_size = vis_vram_limit; + /* Change the size here instead of the init above so only lpfn is affected */ amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); - r = amdgpu_bo_create(adev, 256 * 1024, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &adev->stollen_vga_memory); - if (r) { - return r; - } - r = amdgpu_bo_reserve(adev->stollen_vga_memory, false); + r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->stolen_vga_memory, + NULL, NULL); if (r) return r; - r = amdgpu_bo_pin(adev->stollen_vga_memory, AMDGPU_GEM_DOMAIN_VRAM, NULL); - amdgpu_bo_unreserve(adev->stollen_vga_memory); - if (r) { - amdgpu_bo_unref(&adev->stollen_vga_memory); - return r; - } DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->mc.real_vram_size / (1024 * 1024))); - r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, - adev->mc.gtt_size >> PAGE_SHIFT); + + if (amdgpu_gtt_size == -1) + gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), + adev->mc.mc_vram_size); + else + gtt_size = (uint64_t)amdgpu_gtt_size << 20; + r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT); if (r) { DRM_ERROR("Failed initializing GTT heap.\n"); return r; } DRM_INFO("amdgpu: %uM of GTT memory ready.\n", - (unsigned)(adev->mc.gtt_size / (1024 * 1024))); + (unsigned)(gtt_size / (1024 * 1024))); adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT; adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT; @@ -1197,13 +1362,13 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) if (!adev->mman.initialized) return; amdgpu_ttm_debugfs_fini(adev); - if (adev->stollen_vga_memory) { - r = amdgpu_bo_reserve(adev->stollen_vga_memory, true); + if (adev->stolen_vga_memory) { + r = amdgpu_bo_reserve(adev->stolen_vga_memory, true); if (r == 0) { - amdgpu_bo_unpin(adev->stollen_vga_memory); - amdgpu_bo_unreserve(adev->stollen_vga_memory); + amdgpu_bo_unpin(adev->stolen_vga_memory); + amdgpu_bo_unreserve(adev->stolen_vga_memory); } - amdgpu_bo_unref(&adev->stollen_vga_memory); + amdgpu_bo_unref(&adev->stolen_vga_memory); } ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM); ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT); @@ -1250,12 +1415,77 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) return ttm_bo_mmap(filp, vma, &adev->mman.bdev); } -int amdgpu_copy_buffer(struct amdgpu_ring *ring, - uint64_t src_offset, - uint64_t dst_offset, - uint32_t byte_count, +static int amdgpu_map_buffer(struct ttm_buffer_object *bo, + struct ttm_mem_reg *mem, unsigned num_pages, + uint64_t offset, unsigned window, + struct amdgpu_ring *ring, + uint64_t *addr) +{ + struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; + struct amdgpu_device *adev = ring->adev; + struct ttm_tt *ttm = bo->ttm; + struct amdgpu_job *job; + unsigned num_dw, num_bytes; + dma_addr_t *dma_address; + struct dma_fence *fence; + uint64_t src_addr, dst_addr; + uint64_t flags; + int r; + + BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < + AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); + + *addr = adev->mc.gart_start; + *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * + AMDGPU_GPU_PAGE_SIZE; + + num_dw = adev->mman.buffer_funcs->copy_num_dw; + while (num_dw & 0x7) + num_dw++; + + num_bytes = num_pages * 8; + + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job); + if (r) + return r; + + src_addr = num_dw * 4; + src_addr += job->ibs[0].gpu_addr; + + dst_addr = adev->gart.table_addr; + dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; + amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, + dst_addr, num_bytes); + + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + WARN_ON(job->ibs[0].length_dw > num_dw); + + dma_address = >t->ttm.dma_address[offset >> PAGE_SHIFT]; + flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem); + r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, + &job->ibs[0].ptr[num_dw]); + if (r) + goto error_free; + + r = amdgpu_job_submit(job, ring, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, &fence); + if (r) + goto error_free; + + dma_fence_put(fence); + + return r; + +error_free: + amdgpu_job_free(job); + return r; +} + +int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, + uint64_t dst_offset, uint32_t byte_count, struct reservation_object *resv, - struct dma_fence **fence, bool direct_submit) + struct dma_fence **fence, bool direct_submit, + bool vm_needs_flush) { struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; @@ -1277,6 +1507,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, if (r) return r; + job->vm_needs_flush = vm_needs_flush; if (resv) { r = amdgpu_sync_resv(adev, &job->sync, resv, AMDGPU_FENCE_OWNER_UNDEFINED); @@ -1321,11 +1552,12 @@ error_free: } int amdgpu_fill_buffer(struct amdgpu_bo *bo, - uint32_t src_data, + uint64_t src_data, struct reservation_object *resv, struct dma_fence **fence) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + /* max_bytes applies to SDMA_OP_PTEPDE as well as SDMA_OP_CONST_FILL*/ uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes; struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; @@ -1341,6 +1573,12 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, return -EINVAL; } + if (bo->tbo.mem.mem_type == TTM_PL_TT) { + r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); + if (r) + return r; + } + num_pages = bo->tbo.num_pages; mm_node = bo->tbo.mem.mm_node; num_loops = 0; @@ -1351,7 +1589,9 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, num_pages -= mm_node->size; ++mm_node; } - num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw; + + /* 10 double words for each SDMA_OP_PTEPDE cmd */ + num_dw = num_loops * 10; /* for IB padding */ num_dw += 64; @@ -1376,16 +1616,16 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t byte_count = mm_node->size << PAGE_SHIFT; uint64_t dst_addr; - r = amdgpu_mm_node_addr(&bo->tbo, mm_node, - &bo->tbo.mem, &dst_addr); - if (r) - return r; + WARN_ONCE(byte_count & 0x7, "size should be a multiple of 8"); + dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem); while (byte_count) { uint32_t cur_size_in_bytes = min(byte_count, max_bytes); - amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, - dst_addr, cur_size_in_bytes); + amdgpu_vm_set_pte_pde(adev, &job->ibs[0], + dst_addr, 0, + cur_size_in_bytes >> 3, 0, + src_data); dst_addr += cur_size_in_bytes; byte_count -= cur_size_in_bytes; @@ -1411,32 +1651,16 @@ error_free: #if defined(CONFIG_DEBUG_FS) -extern void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager - *man); static int amdgpu_mm_dump_table(struct seq_file *m, void *data) { struct drm_info_node *node = (struct drm_info_node *)m->private; unsigned ttm_pl = *(int *)node->info_ent->data; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; - struct drm_mm *mm = (struct drm_mm *)adev->mman.bdev.man[ttm_pl].priv; - struct ttm_bo_global *glob = adev->mman.bdev.glob; + struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl]; struct drm_printer p = drm_seq_file_printer(m); - spin_lock(&glob->lru_lock); - drm_mm_print(mm, &p); - spin_unlock(&glob->lru_lock); - switch (ttm_pl) { - case TTM_PL_VRAM: - seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n", - adev->mman.bdev.man[ttm_pl].size, - (u64)atomic64_read(&adev->vram_usage) >> 20, - (u64)atomic64_read(&adev->vram_vis_usage) >> 20); - break; - case TTM_PL_TT: - amdgpu_gtt_mgr_print(m, &adev->mman.bdev.man[TTM_PL_TT]); - break; - } + man->func->debug(man, &p); return 0; } @@ -1462,6 +1686,9 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + if (*pos >= adev->mc.mc_vram_size) + return -ENXIO; + while (size) { unsigned long flags; uint32_t value; @@ -1565,7 +1792,7 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) adev, &amdgpu_ttm_gtt_fops); if (IS_ERR(ent)) return PTR_ERR(ent); - i_size_write(ent->d_inode, adev->mc.gtt_size); + i_size_write(ent->d_inode, adev->mc.gart_size); adev->mman.gtt = ent; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 6bdede8ff12b..43093bffa2cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -34,6 +34,9 @@ #define AMDGPU_PL_FLAG_GWS (TTM_PL_FLAG_PRIV << 1) #define AMDGPU_PL_FLAG_OA (TTM_PL_FLAG_PRIV << 2) +#define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 +#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 + struct amdgpu_mman { struct ttm_bo_global_ref bo_global_ref; struct drm_global_reference mem_global_ref; @@ -49,6 +52,8 @@ struct amdgpu_mman { /* buffer handling */ const struct amdgpu_buffer_funcs *buffer_funcs; struct amdgpu_ring *buffer_funcs_ring; + + struct mutex gtt_window_lock; /* Scheduler entity for buffer moves */ struct amd_sched_entity entity; }; @@ -56,24 +61,25 @@ struct amdgpu_mman { extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func; extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func; -int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, - struct ttm_buffer_object *tbo, - const struct ttm_place *place, - struct ttm_mem_reg *mem); +bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem); +uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man); + +uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man); +uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man); -int amdgpu_copy_buffer(struct amdgpu_ring *ring, - uint64_t src_offset, - uint64_t dst_offset, - uint32_t byte_count, +int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, + uint64_t dst_offset, uint32_t byte_count, struct reservation_object *resv, - struct dma_fence **fence, bool direct_submit); + struct dma_fence **fence, bool direct_submit, + bool vm_needs_flush); int amdgpu_fill_buffer(struct amdgpu_bo *bo, - uint32_t src_data, + uint64_t src_data, struct reservation_object *resv, struct dma_fence **fence); int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); bool amdgpu_ttm_is_bound(struct ttm_tt *ttm); int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem); +int amdgpu_ttm_recover_gart(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index dfd1c98efa7c..36c763310df5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -197,6 +197,27 @@ void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr) } } +void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr) +{ + uint16_t version_major = le16_to_cpu(hdr->header_version_major); + uint16_t version_minor = le16_to_cpu(hdr->header_version_minor); + + DRM_DEBUG("GPU_INFO\n"); + amdgpu_ucode_print_common_hdr(hdr); + + if (version_major == 1) { + const struct gpu_info_firmware_header_v1_0 *gpu_info_hdr = + container_of(hdr, struct gpu_info_firmware_header_v1_0, header); + + DRM_DEBUG("version_major: %u\n", + le16_to_cpu(gpu_info_hdr->version_major)); + DRM_DEBUG("version_minor: %u\n", + le16_to_cpu(gpu_info_hdr->version_minor)); + } else { + DRM_ERROR("Unknown gpu_info ucode version: %u.%u\n", version_major, version_minor); + } +} + int amdgpu_ucode_validate(const struct firmware *fw) { const struct common_firmware_header *hdr = @@ -253,6 +274,11 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) return AMDGPU_FW_LOAD_DIRECT; else return AMDGPU_FW_LOAD_PSP; + case CHIP_RAVEN: + if (load_type != 2) + return AMDGPU_FW_LOAD_DIRECT; + else + return AMDGPU_FW_LOAD_PSP; default: DRM_ERROR("Unknow firmware load type\n"); } @@ -332,8 +358,6 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode, (le32_to_cpu(header->jt_offset) * 4); memcpy(dst_addr, src_addr, le32_to_cpu(header->jt_size) * 4); - ucode->ucode_size += le32_to_cpu(header->jt_size) * 4; - return 0; } @@ -347,9 +371,15 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) struct amdgpu_firmware_info *ucode = NULL; const struct common_firmware_header *header = NULL; + if (!adev->firmware.fw_size) { + dev_warn(adev->dev, "No ip firmware need to load\n"); + return 0; + } + err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, - 0, NULL, NULL, bo); + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, + NULL, NULL, 0, bo); if (err) { dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err); goto failed; @@ -428,6 +458,9 @@ int amdgpu_ucode_fini_bo(struct amdgpu_device *adev) int i; struct amdgpu_firmware_info *ucode = NULL; + if (!adev->firmware.fw_size) + return 0; + for (i = 0; i < adev->firmware.max_ucodes; i++) { ucode = &adev->firmware.ucode[i]; if (ucode->fw) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 758f03a1770d..30b5500dc152 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -113,6 +113,32 @@ struct sdma_firmware_header_v1_1 { uint32_t digest_size; }; +/* gpu info payload */ +struct gpu_info_firmware_v1_0 { + uint32_t gc_num_se; + uint32_t gc_num_cu_per_sh; + uint32_t gc_num_sh_per_se; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_tccs; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; +}; + +/* version_major=1, version_minor=0 */ +struct gpu_info_firmware_header_v1_0 { + struct common_firmware_header header; + uint16_t version_major; /* version */ + uint16_t version_minor; /* version */ +}; + /* header is fixed size */ union amdgpu_firmware_header { struct common_firmware_header common; @@ -124,6 +150,7 @@ union amdgpu_firmware_header { struct rlc_firmware_header_v2_0 rlc_v2_0; struct sdma_firmware_header_v1_0 sdma; struct sdma_firmware_header_v1_1 sdma_v1_1; + struct gpu_info_firmware_header_v1_0 gpu_info; uint8_t raw[0x100]; }; @@ -184,6 +211,7 @@ void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr); +void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr); int amdgpu_ucode_validate(const struct firmware *fw); bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, uint16_t hdr_major, uint16_t hdr_minor); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 2ca09f111f08..e19928dae8e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -588,6 +588,10 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg, } break; + case 8: /* MJPEG */ + min_dpb_size = 0; + break; + case 16: /* H265 */ image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2; image_size = ALIGN(image_size, 256); @@ -1051,7 +1055,7 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &bo); + NULL, NULL, 0, &bo); if (r) return r; @@ -1101,7 +1105,7 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &bo); + NULL, NULL, 0, &bo); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 735c38d7db0d..c855366521ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -165,35 +165,14 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) | (binary_id << 8)); - /* allocate firmware, stack and heap BO */ - - r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &adev->vce.vcpu_bo); + r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->vce.vcpu_bo, + &adev->vce.gpu_addr, &adev->vce.cpu_addr); if (r) { dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r); return r; } - r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false); - if (r) { - amdgpu_bo_unref(&adev->vce.vcpu_bo); - dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r); - return r; - } - - r = amdgpu_bo_pin(adev->vce.vcpu_bo, AMDGPU_GEM_DOMAIN_VRAM, - &adev->vce.gpu_addr); - amdgpu_bo_unreserve(adev->vce.vcpu_bo); - if (r) { - amdgpu_bo_unref(&adev->vce.vcpu_bo); - dev_err(adev->dev, "(%d) VCE bo pin failed\n", r); - return r; - } - - ring = &adev->vce.ring[0]; rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; r = amd_sched_entity_init(&ring->sched, &adev->vce.entity, @@ -230,7 +209,8 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity); - amdgpu_bo_unref(&adev->vce.vcpu_bo); + amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, + (void **)&adev->vce.cpu_addr); for (i = 0; i < adev->vce.num_rings; i++) amdgpu_ring_fini(&adev->vce.ring[i]); @@ -957,9 +937,9 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r, timeout = adev->usec_timeout; - /* workaround VCE ring test slow issue for sriov*/ + /* skip ring test for sriov*/ if (amdgpu_sriov_vf(adev)) - timeout *= 10; + return 0; r = amdgpu_ring_alloc(ring, 16); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index 0a7f18c461e4..5ce54cde472d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -33,6 +33,8 @@ struct amdgpu_vce { struct amdgpu_bo *vcpu_bo; uint64_t gpu_addr; + void *cpu_addr; + void *saved_bo; unsigned fw_version; unsigned fb_version; atomic_t handles[AMDGPU_MAX_VCE_HANDLES]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c new file mode 100644 index 000000000000..041e0121590c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -0,0 +1,652 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ + +#include <linux/firmware.h> +#include <linux/module.h> +#include <drm/drmP.h> +#include <drm/drm.h> + +#include "amdgpu.h" +#include "amdgpu_pm.h" +#include "amdgpu_vcn.h" +#include "soc15d.h" +#include "soc15_common.h" + +#include "vega10/soc15ip.h" +#include "raven1/VCN/vcn_1_0_offset.h" + +/* 1 second timeout */ +#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) + +/* Firmware Names */ +#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" + +MODULE_FIRMWARE(FIRMWARE_RAVEN); + +static void amdgpu_vcn_idle_work_handler(struct work_struct *work); + +int amdgpu_vcn_sw_init(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + struct amd_sched_rq *rq; + unsigned long bo_size; + const char *fw_name; + const struct common_firmware_header *hdr; + unsigned version_major, version_minor, family_id; + int r; + + INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); + + switch (adev->asic_type) { + case CHIP_RAVEN: + fw_name = FIRMWARE_RAVEN; + break; + default: + return -EINVAL; + } + + r = request_firmware(&adev->vcn.fw, fw_name, adev->dev); + if (r) { + dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n", + fw_name); + return r; + } + + r = amdgpu_ucode_validate(adev->vcn.fw); + if (r) { + dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n", + fw_name); + release_firmware(adev->vcn.fw); + adev->vcn.fw = NULL; + return r; + } + + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + family_id = le32_to_cpu(hdr->ucode_version) & 0xff; + version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; + version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; + DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n", + version_major, version_minor, family_id); + + + bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) + + AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE + + AMDGPU_VCN_SESSION_SIZE * 40; + r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo, + &adev->vcn.gpu_addr, &adev->vcn.cpu_addr); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); + return r; + } + + ring = &adev->vcn.ring_dec; + rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_dec, + rq, amdgpu_sched_jobs); + if (r != 0) { + DRM_ERROR("Failed setting up VCN dec run queue.\n"); + return r; + } + + ring = &adev->vcn.ring_enc[0]; + rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_enc, + rq, amdgpu_sched_jobs); + if (r != 0) { + DRM_ERROR("Failed setting up VCN enc run queue.\n"); + return r; + } + + return 0; +} + +int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) +{ + int i; + + kfree(adev->vcn.saved_bo); + + amd_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec); + + amd_sched_entity_fini(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc); + + amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, + &adev->vcn.gpu_addr, + (void **)&adev->vcn.cpu_addr); + + amdgpu_ring_fini(&adev->vcn.ring_dec); + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) + amdgpu_ring_fini(&adev->vcn.ring_enc[i]); + + release_firmware(adev->vcn.fw); + + return 0; +} + +int amdgpu_vcn_suspend(struct amdgpu_device *adev) +{ + unsigned size; + void *ptr; + + if (adev->vcn.vcpu_bo == NULL) + return 0; + + cancel_delayed_work_sync(&adev->vcn.idle_work); + + size = amdgpu_bo_size(adev->vcn.vcpu_bo); + ptr = adev->vcn.cpu_addr; + + adev->vcn.saved_bo = kmalloc(size, GFP_KERNEL); + if (!adev->vcn.saved_bo) + return -ENOMEM; + + memcpy_fromio(adev->vcn.saved_bo, ptr, size); + + return 0; +} + +int amdgpu_vcn_resume(struct amdgpu_device *adev) +{ + unsigned size; + void *ptr; + + if (adev->vcn.vcpu_bo == NULL) + return -EINVAL; + + size = amdgpu_bo_size(adev->vcn.vcpu_bo); + ptr = adev->vcn.cpu_addr; + + if (adev->vcn.saved_bo != NULL) { + memcpy_toio(ptr, adev->vcn.saved_bo, size); + kfree(adev->vcn.saved_bo); + adev->vcn.saved_bo = NULL; + } else { + const struct common_firmware_header *hdr; + unsigned offset; + + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + offset = le32_to_cpu(hdr->ucode_array_offset_bytes); + memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, + le32_to_cpu(hdr->ucode_size_bytes)); + size -= le32_to_cpu(hdr->ucode_size_bytes); + ptr += le32_to_cpu(hdr->ucode_size_bytes); + memset_io(ptr, 0, size); + } + + return 0; +} + +static void amdgpu_vcn_idle_work_handler(struct work_struct *work) +{ + struct amdgpu_device *adev = + container_of(work, struct amdgpu_device, vcn.idle_work.work); + unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec); + + if (fences == 0) { + if (adev->pm.dpm_enabled) { + /* might be used when with pg/cg + amdgpu_dpm_enable_uvd(adev, false); + */ + } + } else { + schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); + } +} + +void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); + + if (set_clocks && adev->pm.dpm_enabled) { + /* might be used when with pg/cg + amdgpu_dpm_enable_uvd(adev, true); + */ + } +} + +void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) +{ + schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); +} + +int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t tmp = 0; + unsigned i; + int r; + + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD); + r = amdgpu_ring_alloc(ring, 3); + if (r) { + DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID)); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + + if (i < adev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", + ring->idx, i); + } else { + DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", + ring->idx, tmp); + r = -EINVAL; + } + return r; +} + +static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, + bool direct, struct dma_fence **fence) +{ + struct ttm_validate_buffer tv; + struct ww_acquire_ctx ticket; + struct list_head head; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct dma_fence *f = NULL; + struct amdgpu_device *adev = ring->adev; + uint64_t addr; + int i, r; + + memset(&tv, 0, sizeof(tv)); + tv.bo = &bo->tbo; + + INIT_LIST_HEAD(&head); + list_add(&tv.head, &head); + + r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL); + if (r) + return r; + + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + if (r) + goto err; + + r = amdgpu_job_alloc_with_ib(adev, 64, &job); + if (r) + goto err; + + ib = &job->ibs[0]; + addr = amdgpu_bo_gpu_offset(bo); + ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0); + ib->ptr[1] = addr; + ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0); + ib->ptr[3] = addr >> 32; + ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0); + ib->ptr[5] = 0; + for (i = 6; i < 16; i += 2) { + ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0); + ib->ptr[i+1] = 0; + } + ib->length_dw = 16; + + if (direct) { + r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); + job->fence = dma_fence_get(f); + if (r) + goto err_free; + + amdgpu_job_free(job); + } else { + r = amdgpu_job_submit(job, ring, &adev->vcn.entity_dec, + AMDGPU_FENCE_OWNER_UNDEFINED, &f); + if (r) + goto err_free; + } + + ttm_eu_fence_buffer_objects(&ticket, &head, f); + + if (fence) + *fence = dma_fence_get(f); + amdgpu_bo_unref(&bo); + dma_fence_put(f); + + return 0; + +err_free: + amdgpu_job_free(job); + +err: + ttm_eu_backoff_reservation(&ticket, &head); + return r; +} + +static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct dma_fence **fence) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_bo *bo; + uint32_t *msg; + int r, i; + + r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, + NULL, NULL, 0, &bo); + if (r) + return r; + + r = amdgpu_bo_reserve(bo, false); + if (r) { + amdgpu_bo_unref(&bo); + return r; + } + + r = amdgpu_bo_kmap(bo, (void **)&msg); + if (r) { + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); + return r; + } + + msg[0] = cpu_to_le32(0x00000028); + msg[1] = cpu_to_le32(0x00000038); + msg[2] = cpu_to_le32(0x00000001); + msg[3] = cpu_to_le32(0x00000000); + msg[4] = cpu_to_le32(handle); + msg[5] = cpu_to_le32(0x00000000); + msg[6] = cpu_to_le32(0x00000001); + msg[7] = cpu_to_le32(0x00000028); + msg[8] = cpu_to_le32(0x00000010); + msg[9] = cpu_to_le32(0x00000000); + msg[10] = cpu_to_le32(0x00000007); + msg[11] = cpu_to_le32(0x00000000); + msg[12] = cpu_to_le32(0x00000780); + msg[13] = cpu_to_le32(0x00000440); + for (i = 14; i < 1024; ++i) + msg[i] = cpu_to_le32(0x0); + + amdgpu_bo_kunmap(bo); + amdgpu_bo_unreserve(bo); + + return amdgpu_vcn_dec_send_msg(ring, bo, true, fence); +} + +static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + bool direct, struct dma_fence **fence) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_bo *bo; + uint32_t *msg; + int r, i; + + r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, + NULL, NULL, 0, &bo); + if (r) + return r; + + r = amdgpu_bo_reserve(bo, false); + if (r) { + amdgpu_bo_unref(&bo); + return r; + } + + r = amdgpu_bo_kmap(bo, (void **)&msg); + if (r) { + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); + return r; + } + + msg[0] = cpu_to_le32(0x00000028); + msg[1] = cpu_to_le32(0x00000018); + msg[2] = cpu_to_le32(0x00000000); + msg[3] = cpu_to_le32(0x00000002); + msg[4] = cpu_to_le32(handle); + msg[5] = cpu_to_le32(0x00000000); + for (i = 6; i < 1024; ++i) + msg[i] = cpu_to_le32(0x0); + + amdgpu_bo_kunmap(bo); + amdgpu_bo_unreserve(bo); + + return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence); +} + +int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct dma_fence *fence; + long r; + + r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL); + if (r) { + DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + goto error; + } + + r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, true, &fence); + if (r) { + DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + goto error; + } + + r = dma_fence_wait_timeout(fence, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out.\n"); + r = -ETIMEDOUT; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + } else { + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + r = 0; + } + + dma_fence_put(fence); + +error: + return r; +} + +int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t rptr = amdgpu_ring_get_rptr(ring); + unsigned i; + int r; + + r = amdgpu_ring_alloc(ring, 16); + if (r) { + DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + amdgpu_ring_write(ring, VCN_ENC_CMD_END); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + if (amdgpu_ring_get_rptr(ring) != rptr) + break; + DRM_UDELAY(1); + } + + if (i < adev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", + ring->idx, i); + } else { + DRM_ERROR("amdgpu: ring %d test failed\n", + ring->idx); + r = -ETIMEDOUT; + } + + return r; +} + +static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct dma_fence **fence) +{ + const unsigned ib_size_dw = 16; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct dma_fence *f = NULL; + uint64_t dummy; + int i, r; + + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); + if (r) + return r; + + ib = &job->ibs[0]; + dummy = ib->gpu_addr + 1024; + + ib->length_dw = 0; + ib->ptr[ib->length_dw++] = 0x00000018; + ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ + ib->ptr[ib->length_dw++] = handle; + ib->ptr[ib->length_dw++] = upper_32_bits(dummy); + ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = 0x0000000b; + + ib->ptr[ib->length_dw++] = 0x00000014; + ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ + ib->ptr[ib->length_dw++] = 0x0000001c; + ib->ptr[ib->length_dw++] = 0x00000000; + ib->ptr[ib->length_dw++] = 0x00000000; + + ib->ptr[ib->length_dw++] = 0x00000008; + ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */ + + for (i = ib->length_dw; i < ib_size_dw; ++i) + ib->ptr[i] = 0x0; + + r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); + job->fence = dma_fence_get(f); + if (r) + goto err; + + amdgpu_job_free(job); + if (fence) + *fence = dma_fence_get(f); + dma_fence_put(f); + + return 0; + +err: + amdgpu_job_free(job); + return r; +} + +static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + struct dma_fence **fence) +{ + const unsigned ib_size_dw = 16; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct dma_fence *f = NULL; + uint64_t dummy; + int i, r; + + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); + if (r) + return r; + + ib = &job->ibs[0]; + dummy = ib->gpu_addr + 1024; + + ib->length_dw = 0; + ib->ptr[ib->length_dw++] = 0x00000018; + ib->ptr[ib->length_dw++] = 0x00000001; + ib->ptr[ib->length_dw++] = handle; + ib->ptr[ib->length_dw++] = upper_32_bits(dummy); + ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = 0x0000000b; + + ib->ptr[ib->length_dw++] = 0x00000014; + ib->ptr[ib->length_dw++] = 0x00000002; + ib->ptr[ib->length_dw++] = 0x0000001c; + ib->ptr[ib->length_dw++] = 0x00000000; + ib->ptr[ib->length_dw++] = 0x00000000; + + ib->ptr[ib->length_dw++] = 0x00000008; + ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */ + + for (i = ib->length_dw; i < ib_size_dw; ++i) + ib->ptr[i] = 0x0; + + r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); + job->fence = dma_fence_get(f); + if (r) + goto err; + + amdgpu_job_free(job); + if (fence) + *fence = dma_fence_get(f); + dma_fence_put(f); + + return 0; + +err: + amdgpu_job_free(job); + return r; +} + +int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct dma_fence *fence = NULL; + long r; + + r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL); + if (r) { + DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + goto error; + } + + r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence); + if (r) { + DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + goto error; + } + + r = dma_fence_wait_timeout(fence, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out.\n"); + r = -ETIMEDOUT; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + } else { + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + r = 0; + } +error: + dma_fence_put(fence); + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h new file mode 100644 index 000000000000..d50ba0657854 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -0,0 +1,77 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_VCN_H__ +#define __AMDGPU_VCN_H__ + +#define AMDGPU_VCN_STACK_SIZE (200*1024) +#define AMDGPU_VCN_HEAP_SIZE (256*1024) +#define AMDGPU_VCN_SESSION_SIZE (50*1024) +#define AMDGPU_VCN_FIRMWARE_OFFSET 256 +#define AMDGPU_VCN_MAX_ENC_RINGS 3 + +#define VCN_DEC_CMD_FENCE 0x00000000 +#define VCN_DEC_CMD_TRAP 0x00000001 +#define VCN_DEC_CMD_WRITE_REG 0x00000004 +#define VCN_DEC_CMD_REG_READ_COND_WAIT 0x00000006 +#define VCN_DEC_CMD_PACKET_START 0x0000000a +#define VCN_DEC_CMD_PACKET_END 0x0000000b + +#define VCN_ENC_CMD_NO_OP 0x00000000 +#define VCN_ENC_CMD_END 0x00000001 +#define VCN_ENC_CMD_IB 0x00000002 +#define VCN_ENC_CMD_FENCE 0x00000003 +#define VCN_ENC_CMD_TRAP 0x00000004 +#define VCN_ENC_CMD_REG_WRITE 0x0000000b +#define VCN_ENC_CMD_REG_WAIT 0x0000000c + +struct amdgpu_vcn { + struct amdgpu_bo *vcpu_bo; + void *cpu_addr; + uint64_t gpu_addr; + unsigned fw_version; + void *saved_bo; + struct delayed_work idle_work; + const struct firmware *fw; /* VCN firmware */ + struct amdgpu_ring ring_dec; + struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; + struct amdgpu_irq_src irq; + struct amd_sched_entity entity_dec; + struct amd_sched_entity entity_enc; + unsigned num_enc_rings; +}; + +int amdgpu_vcn_sw_init(struct amdgpu_device *adev); +int amdgpu_vcn_sw_fini(struct amdgpu_device *adev); +int amdgpu_vcn_suspend(struct amdgpu_device *adev); +int amdgpu_vcn_resume(struct amdgpu_device *adev); +void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring); +void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring); + +int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring); +int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout); + +int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring); +int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c new file mode 100644 index 000000000000..45ac91861965 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c @@ -0,0 +1,85 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_vf_error.h" +#include "mxgpu_ai.h" + +#define AMDGPU_VF_ERROR_ENTRY_SIZE 16 + +/* struct error_entry - amdgpu VF error information. */ +struct amdgpu_vf_error_buffer { + int read_count; + int write_count; + uint16_t code[AMDGPU_VF_ERROR_ENTRY_SIZE]; + uint16_t flags[AMDGPU_VF_ERROR_ENTRY_SIZE]; + uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE]; +}; + +struct amdgpu_vf_error_buffer admgpu_vf_errors; + + +void amdgpu_vf_error_put(uint16_t sub_error_code, uint16_t error_flags, uint64_t error_data) +{ + int index; + uint16_t error_code = AMDGIM_ERROR_CODE(AMDGIM_ERROR_CATEGORY_VF, sub_error_code); + + index = admgpu_vf_errors.write_count % AMDGPU_VF_ERROR_ENTRY_SIZE; + admgpu_vf_errors.code [index] = error_code; + admgpu_vf_errors.flags [index] = error_flags; + admgpu_vf_errors.data [index] = error_data; + admgpu_vf_errors.write_count ++; +} + + +void amdgpu_vf_error_trans_all(struct amdgpu_device *adev) +{ + /* u32 pf2vf_flags = 0; */ + u32 data1, data2, data3; + int index; + + if ((NULL == adev) || (!amdgpu_sriov_vf(adev)) || (!adev->virt.ops) || (!adev->virt.ops->trans_msg)) { + return; + } +/* + TODO: Enable these code when pv2vf_info is merged + AMDGPU_FW_VRAM_PF2VF_READ (adev, feature_flags, &pf2vf_flags); + if (!(pf2vf_flags & AMDGIM_FEATURE_ERROR_LOG_COLLECT)) { + return; + } +*/ + /* The errors are overlay of array, correct read_count as full. */ + if (admgpu_vf_errors.write_count - admgpu_vf_errors.read_count > AMDGPU_VF_ERROR_ENTRY_SIZE) { + admgpu_vf_errors.read_count = admgpu_vf_errors.write_count - AMDGPU_VF_ERROR_ENTRY_SIZE; + } + + while (admgpu_vf_errors.read_count < admgpu_vf_errors.write_count) { + index =admgpu_vf_errors.read_count % AMDGPU_VF_ERROR_ENTRY_SIZE; + data1 = AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX (admgpu_vf_errors.code[index], admgpu_vf_errors.flags[index]); + data2 = admgpu_vf_errors.data[index] & 0xFFFFFFFF; + data3 = (admgpu_vf_errors.data[index] >> 32) & 0xFFFFFFFF; + + adev->virt.ops->trans_msg(adev, IDH_LOG_VF_ERROR, data1, data2, data3); + admgpu_vf_errors.read_count ++; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h new file mode 100644 index 000000000000..2a3278ec76ba --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h @@ -0,0 +1,62 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VF_ERROR_H__ +#define __VF_ERROR_H__ + +#define AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX(c,f) (((c & 0xFFFF) << 16) | (f & 0xFFFF)) +#define AMDGIM_ERROR_CODE(t,c) (((t&0xF)<<12)|(c&0xFFF)) + +/* Please keep enum same as AMD GIM driver */ +enum AMDGIM_ERROR_VF { + AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL = 0, + AMDGIM_ERROR_VF_NO_VBIOS, + AMDGIM_ERROR_VF_GPU_POST_ERROR, + AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, + AMDGIM_ERROR_VF_FENCE_INIT_FAIL, + + AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, + AMDGIM_ERROR_VF_IB_INIT_FAIL, + AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, + AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, + AMDGIM_ERROR_VF_GPU_RESET_FAIL, + + AMDGIM_ERROR_VF_TEST, + AMDGIM_ERROR_VF_MAX +}; + +enum AMDGIM_ERROR_CATEGORY { + AMDGIM_ERROR_CATEGORY_NON_USED = 0, + AMDGIM_ERROR_CATEGORY_GIM, + AMDGIM_ERROR_CATEGORY_PF, + AMDGIM_ERROR_CATEGORY_VF, + AMDGIM_ERROR_CATEGORY_VBIOS, + AMDGIM_ERROR_CATEGORY_MONITOR, + + AMDGIM_ERROR_CATEGORY_MAX +}; + +void amdgpu_vf_error_put(uint16_t sub_error_code, uint16_t error_flags, uint64_t error_data); +void amdgpu_vf_error_trans_all (struct amdgpu_device *adev); + +#endif /* __VF_ERROR_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 6bf5cea294f2..ab05121b9272 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -22,6 +22,7 @@ */ #include "amdgpu.h" +#define MAX_KIQ_REG_WAIT 100000 int amdgpu_allocate_static_csa(struct amdgpu_device *adev) { @@ -45,14 +46,14 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev) * address within META_DATA init package to support SRIOV gfx preemption. */ -int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm) +int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo_va **bo_va) { - int r; - struct amdgpu_bo_va *bo_va; struct ww_acquire_ctx ticket; struct list_head list; struct amdgpu_bo_list_entry pd; struct ttm_validate_buffer csa_tv; + int r; INIT_LIST_HEAD(&list); INIT_LIST_HEAD(&csa_tv.head); @@ -68,34 +69,33 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm) return r; } - bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj); - if (!bo_va) { + *bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj); + if (!*bo_va) { ttm_eu_backoff_reservation(&ticket, &list); DRM_ERROR("failed to create bo_va for static CSA\n"); return -ENOMEM; } - r = amdgpu_vm_alloc_pts(adev, bo_va->vm, AMDGPU_CSA_VADDR, - AMDGPU_CSA_SIZE); + r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, AMDGPU_CSA_VADDR, + AMDGPU_CSA_SIZE); if (r) { DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); - amdgpu_vm_bo_rmv(adev, bo_va); + amdgpu_vm_bo_rmv(adev, *bo_va); ttm_eu_backoff_reservation(&ticket, &list); return r; } - r = amdgpu_vm_bo_map(adev, bo_va, AMDGPU_CSA_VADDR, 0,AMDGPU_CSA_SIZE, - AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | - AMDGPU_PTE_EXECUTABLE); + r = amdgpu_vm_bo_map(adev, *bo_va, AMDGPU_CSA_VADDR, 0, AMDGPU_CSA_SIZE, + AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | + AMDGPU_PTE_EXECUTABLE); if (r) { DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); - amdgpu_vm_bo_rmv(adev, bo_va); + amdgpu_vm_bo_rmv(adev, *bo_va); ttm_eu_backoff_reservation(&ticket, &list); return r; } - vm->csa_bo_va = bo_va; ttm_eu_backoff_reservation(&ticket, &list); return 0; } @@ -105,8 +105,9 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) /* enable virtual display */ adev->mode_info.num_crtc = 1; adev->enable_virtual_display = true; + adev->cg_flags = 0; + adev->pg_flags = 0; - mutex_init(&adev->virt.lock_kiq); mutex_init(&adev->virt.lock_reset); } @@ -120,17 +121,19 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) BUG_ON(!ring->funcs->emit_rreg); - mutex_lock(&adev->virt.lock_kiq); + mutex_lock(&kiq->ring_mutex); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_rreg(ring, reg); amdgpu_fence_emit(ring, &f); amdgpu_ring_commit(ring); - mutex_unlock(&adev->virt.lock_kiq); + mutex_unlock(&kiq->ring_mutex); - r = dma_fence_wait(f, false); - if (r) - DRM_ERROR("wait for kiq fence error: %ld.\n", r); + r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); dma_fence_put(f); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return ~0; + } val = adev->wb.wb[adev->virt.reg_val_offs]; @@ -146,15 +149,15 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) BUG_ON(!ring->funcs->emit_wreg); - mutex_lock(&adev->virt.lock_kiq); + mutex_lock(&kiq->ring_mutex); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); amdgpu_fence_emit(ring, &f); amdgpu_ring_commit(ring); - mutex_unlock(&adev->virt.lock_kiq); + mutex_unlock(&kiq->ring_mutex); - r = dma_fence_wait(f, false); - if (r) + r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); + if (r < 1) DRM_ERROR("wait for kiq fence error: %ld.\n", r); dma_fence_put(f); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index a8ed162cc0bc..afcfb8bcfb65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -43,6 +43,7 @@ struct amdgpu_virt_ops { int (*req_full_gpu)(struct amdgpu_device *adev, bool init); int (*rel_full_gpu)(struct amdgpu_device *adev, bool init); int (*reset_gpu)(struct amdgpu_device *adev); + void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); }; /* GPU virtualization */ @@ -52,7 +53,6 @@ struct amdgpu_virt { uint64_t csa_vmid0_addr; bool chained_ib_support; uint32_t reg_val_offs; - struct mutex lock_kiq; struct mutex lock_reset; struct amdgpu_irq_src ack_irq; struct amdgpu_irq_src rcv_irq; @@ -90,14 +90,15 @@ static inline bool is_virtual_machine(void) struct amdgpu_vm; int amdgpu_allocate_static_csa(struct amdgpu_device *adev); -int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm); +int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo_va **bo_va); void amdgpu_virt_init_setting(struct amdgpu_device *adev); uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); -int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary); +int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job); int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8ecf82c5fe74..bd20ff018512 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -77,8 +77,12 @@ struct amdgpu_pte_update_params { void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags); - /* indicate update pt or its shadow */ - bool shadow; + /* The next two are used during VM update by CPU + * DMA addresses to use for mapping + * Kernel pointer of PD/PT BO that needs to be updated + */ + dma_addr_t *pages_addr; + void *kptr; }; /* Helper to disable partial resident texture feature from a fence callback */ @@ -155,11 +159,18 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, */ static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, int (*validate)(void *, struct amdgpu_bo *), - void *param) + void *param, bool use_cpu_for_update, + struct ttm_bo_global *glob) { unsigned i; int r; + if (use_cpu_for_update) { + r = amdgpu_bo_kmap(parent->bo, NULL); + if (r) + return r; + } + if (!parent->entries) return 0; @@ -173,11 +184,18 @@ static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, if (r) return r; + spin_lock(&glob->lru_lock); + ttm_bo_move_to_lru_tail(&entry->bo->tbo); + if (entry->bo->shadow) + ttm_bo_move_to_lru_tail(&entry->bo->shadow->tbo); + spin_unlock(&glob->lru_lock); + /* * Recurse into the sub directory. This is harmless because we * have only a maximum of 5 layers. */ - r = amdgpu_vm_validate_level(entry, validate, param); + r = amdgpu_vm_validate_level(entry, validate, param, + use_cpu_for_update, glob); if (r) return r; } @@ -208,54 +226,12 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (num_evictions == vm->last_eviction_counter) return 0; - return amdgpu_vm_validate_level(&vm->root, validate, param); -} - -/** - * amdgpu_vm_move_level_in_lru - move one level of PT BOs to the LRU tail - * - * @adev: amdgpu device instance - * @vm: vm providing the BOs - * - * Move the PT BOs to the tail of the LRU. - */ -static void amdgpu_vm_move_level_in_lru(struct amdgpu_vm_pt *parent) -{ - unsigned i; - - if (!parent->entries) - return; - - for (i = 0; i <= parent->last_entry_used; ++i) { - struct amdgpu_vm_pt *entry = &parent->entries[i]; - - if (!entry->bo) - continue; - - ttm_bo_move_to_lru_tail(&entry->bo->tbo); - amdgpu_vm_move_level_in_lru(entry); - } + return amdgpu_vm_validate_level(&vm->root, validate, param, + vm->use_cpu_for_update, + adev->mman.bdev.glob); } /** - * amdgpu_vm_move_pt_bos_in_lru - move the PT BOs to the LRU tail - * - * @adev: amdgpu device instance - * @vm: vm providing the BOs - * - * Move the PT BOs to the tail of the LRU. - */ -void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, - struct amdgpu_vm *vm) -{ - struct ttm_bo_global *glob = adev->mman.bdev.glob; - - spin_lock(&glob->lru_lock); - amdgpu_vm_move_level_in_lru(&vm->root); - spin_unlock(&glob->lru_lock); -} - - /** * amdgpu_vm_alloc_levels - allocate the PD/PT levels * * @adev: amdgpu_device pointer @@ -275,12 +251,15 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, adev->vm_manager.block_size; unsigned pt_idx, from, to; int r; + u64 flags; + uint64_t init_value = 0; if (!parent->entries) { unsigned num_entries = amdgpu_vm_num_entries(adev, level); - parent->entries = drm_calloc_large(num_entries, - sizeof(struct amdgpu_vm_pt)); + parent->entries = kvmalloc_array(num_entries, + sizeof(struct amdgpu_vm_pt), + GFP_KERNEL | __GFP_ZERO); if (!parent->entries) return -ENOMEM; memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt)); @@ -299,6 +278,20 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, saddr = saddr & ((1 << shift) - 1); eaddr = eaddr & ((1 << shift) - 1); + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_VRAM_CLEARED; + if (vm->use_cpu_for_update) + flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + else + flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | + AMDGPU_GEM_CREATE_SHADOW); + + if (vm->pte_support_ats) { + init_value = AMDGPU_PTE_SYSTEM; + if (level != adev->vm_manager.num_level - 1) + init_value |= AMDGPU_PDE_PTE; + } + /* walk over the address space and allocate the page tables */ for (pt_idx = from; pt_idx <= to; ++pt_idx) { struct reservation_object *resv = vm->root.bo->tbo.resv; @@ -310,14 +303,19 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, amdgpu_vm_bo_size(adev, level), AMDGPU_GPU_PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_NO_CPU_ACCESS | - AMDGPU_GEM_CREATE_SHADOW | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED, - NULL, resv, &pt); + flags, + NULL, resv, init_value, &pt); if (r) return r; + if (vm->use_cpu_for_update) { + r = amdgpu_bo_kmap(pt, NULL); + if (r) { + amdgpu_bo_unref(&pt); + return r; + } + } + /* Keep a reference to the root directory to avoid * freeing them up in the wrong order. */ @@ -391,6 +389,71 @@ static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev, atomic_read(&adev->gpu_reset_counter); } +static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub) +{ + return !!vm->reserved_vmid[vmhub]; +} + +/* idr_mgr->lock must be held */ +static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm, + struct amdgpu_ring *ring, + struct amdgpu_sync *sync, + struct dma_fence *fence, + struct amdgpu_job *job) +{ + struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + uint64_t fence_context = adev->fence_context + ring->idx; + struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub]; + struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct dma_fence *updates = sync->last_vm_update; + int r = 0; + struct dma_fence *flushed, *tmp; + bool needs_flush = vm->use_cpu_for_update; + + flushed = id->flushed_updates; + if ((amdgpu_vm_had_gpu_reset(adev, id)) || + (atomic64_read(&id->owner) != vm->client_id) || + (job->vm_pd_addr != id->pd_gpu_addr) || + (updates && (!flushed || updates->context != flushed->context || + dma_fence_is_later(updates, flushed))) || + (!id->last_flush || (id->last_flush->context != fence_context && + !dma_fence_is_signaled(id->last_flush)))) { + needs_flush = true; + /* to prevent one context starved by another context */ + id->pd_gpu_addr = 0; + tmp = amdgpu_sync_peek_fence(&id->active, ring); + if (tmp) { + r = amdgpu_sync_fence(adev, sync, tmp); + return r; + } + } + + /* Good we can use this VMID. Remember this submission as + * user of the VMID. + */ + r = amdgpu_sync_fence(ring->adev, &id->active, fence); + if (r) + goto out; + + if (updates && (!flushed || updates->context != flushed->context || + dma_fence_is_later(updates, flushed))) { + dma_fence_put(id->flushed_updates); + id->flushed_updates = dma_fence_get(updates); + } + id->pd_gpu_addr = job->vm_pd_addr; + atomic64_set(&id->owner, vm->client_id); + job->vm_needs_flush = needs_flush; + if (needs_flush) { + dma_fence_put(id->last_flush); + id->last_flush = NULL; + } + job->vm_id = id - id_mgr->ids; + trace_amdgpu_vm_grab_id(vm, ring, job); +out: + return r; +} + /** * amdgpu_vm_grab_id - allocate the next free VMID * @@ -415,12 +478,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, unsigned i; int r = 0; + mutex_lock(&id_mgr->lock); + if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) { + r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, job); + mutex_unlock(&id_mgr->lock); + return r; + } fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); - if (!fences) + if (!fences) { + mutex_unlock(&id_mgr->lock); return -ENOMEM; - - mutex_lock(&id_mgr->lock); - + } /* Check if we have an idle VMID */ i = 0; list_for_each_entry(idle, &id_mgr->ids_lru, list) { @@ -462,11 +530,11 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, } kfree(fences); - job->vm_needs_flush = false; + job->vm_needs_flush = vm->use_cpu_for_update; /* Check if we can use a VMID already assigned to this VM */ list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) { struct dma_fence *flushed; - bool needs_flush = false; + bool needs_flush = vm->use_cpu_for_update; /* Check all the prerequisites to using this VMID */ if (amdgpu_vm_had_gpu_reset(adev, id)) @@ -521,7 +589,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, id->pd_gpu_addr = job->vm_pd_addr; dma_fence_put(id->flushed_updates); id->flushed_updates = dma_fence_get(updates); - id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); atomic64_set(&id->owner, vm->client_id); needs_flush: @@ -540,40 +607,118 @@ error: return r; } -static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring) +static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + unsigned vmhub) +{ + struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + + mutex_lock(&id_mgr->lock); + if (vm->reserved_vmid[vmhub]) { + list_add(&vm->reserved_vmid[vmhub]->list, + &id_mgr->ids_lru); + vm->reserved_vmid[vmhub] = NULL; + atomic_dec(&id_mgr->reserved_vmid_num); + } + mutex_unlock(&id_mgr->lock); +} + +static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + unsigned vmhub) +{ + struct amdgpu_vm_id_manager *id_mgr; + struct amdgpu_vm_id *idle; + int r = 0; + + id_mgr = &adev->vm_manager.id_mgr[vmhub]; + mutex_lock(&id_mgr->lock); + if (vm->reserved_vmid[vmhub]) + goto unlock; + if (atomic_inc_return(&id_mgr->reserved_vmid_num) > + AMDGPU_VM_MAX_RESERVED_VMID) { + DRM_ERROR("Over limitation of reserved vmid\n"); + atomic_dec(&id_mgr->reserved_vmid_num); + r = -EINVAL; + goto unlock; + } + /* Select the first entry VMID */ + idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vm_id, list); + list_del_init(&idle->list); + vm->reserved_vmid[vmhub] = idle; + mutex_unlock(&id_mgr->lock); + + return 0; +unlock: + mutex_unlock(&id_mgr->lock); + return r; +} + +/** + * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug + * + * @adev: amdgpu_device pointer + */ +void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev) { - struct amdgpu_device *adev = ring->adev; const struct amdgpu_ip_block *ip_block; + bool has_compute_vm_bug; + struct amdgpu_ring *ring; + int i; - if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) - /* only compute rings */ - return false; + has_compute_vm_bug = false; ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); - if (!ip_block) - return false; + if (ip_block) { + /* Compute has a VM bug for GFX version < 7. + Compute has a VM bug for GFX 8 MEC firmware version < 673.*/ + if (ip_block->version->major <= 7) + has_compute_vm_bug = true; + else if (ip_block->version->major == 8) + if (adev->gfx.mec_fw_version < 673) + has_compute_vm_bug = true; + } - if (ip_block->version->major <= 7) { - /* gfx7 has no workaround */ - return true; - } else if (ip_block->version->major == 8) { - if (adev->gfx.mec_fw_version >= 673) - /* gfx8 is fixed in MEC firmware 673 */ - return false; + for (i = 0; i < adev->num_rings; i++) { + ring = adev->rings[i]; + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) + /* only compute rings */ + ring->has_compute_vm_bug = has_compute_vm_bug; else - return true; + ring->has_compute_vm_bug = false; } - return false; } -static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) +bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, + struct amdgpu_job *job) { - u64 addr = mc_addr; + struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct amdgpu_vm_id *id; + bool gds_switch_needed; + bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug; + + if (job->vm_id == 0) + return false; + id = &id_mgr->ids[job->vm_id]; + gds_switch_needed = ring->funcs->emit_gds_switch && ( + id->gds_base != job->gds_base || + id->gds_size != job->gds_size || + id->gws_base != job->gws_base || + id->gws_size != job->gws_size || + id->oa_base != job->oa_base || + id->oa_size != job->oa_size); + + if (amdgpu_vm_had_gpu_reset(adev, id)) + return true; - if (adev->gart.gart_funcs->adjust_mc_addr) - addr = adev->gart.gart_funcs->adjust_mc_addr(adev, addr); + return vm_flush_needed || gds_switch_needed; +} - return addr; +static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) +{ + return (adev->mc.real_vram_size == adev->mc.visible_vram_size); } /** @@ -585,7 +730,7 @@ static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) * * Emit a VM flush when it is necessary. */ -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; @@ -598,8 +743,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) id->gws_size != job->gws_size || id->oa_base != job->oa_base || id->oa_size != job->oa_size); - bool vm_flush_needed = job->vm_needs_flush || - amdgpu_vm_ring_has_compute_vm_bug(ring); + bool vm_flush_needed = job->vm_needs_flush; unsigned patch_offset = 0; int r; @@ -608,21 +752,20 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) vm_flush_needed = true; } - if (!vm_flush_needed && !gds_switch_needed) + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) return 0; if (ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); - if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync) + if (need_pipe_sync) amdgpu_ring_emit_pipeline_sync(ring); if (ring->funcs->emit_vm_flush && vm_flush_needed) { - u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr); struct dma_fence *fence; - trace_amdgpu_vm_flush(ring, job->vm_id, pd_addr); - amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr); + trace_amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr); + amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr); r = amdgpu_fence_emit(ring, &fence); if (r) @@ -631,6 +774,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) mutex_lock(&id_mgr->lock); dma_fence_put(id->last_flush); id->last_flush = fence; + id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); mutex_unlock(&id_mgr->lock); } @@ -718,8 +862,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, { struct amdgpu_bo_va *bo_va; - list_for_each_entry(bo_va, &bo->va, bo_list) { - if (bo_va->vm == vm) { + list_for_each_entry(bo_va, &bo->va, base.bo_list) { + if (bo_va->base.vm == vm) { return bo_va; } } @@ -805,6 +949,52 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) return result; } +/** + * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU + * + * @params: see amdgpu_pte_update_params definition + * @pe: kmap addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: hw access flags + * + * Write count number of PT/PD entries directly. + */ +static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, + uint64_t pe, uint64_t addr, + unsigned count, uint32_t incr, + uint64_t flags) +{ + unsigned int i; + uint64_t value; + + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); + + for (i = 0; i < count; i++) { + value = params->pages_addr ? + amdgpu_vm_map_gart(params->pages_addr, addr) : + addr; + amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe, + i, value, flags); + addr += incr; + } +} + +static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, + void *owner) +{ + struct amdgpu_sync sync; + int r; + + amdgpu_sync_create(&sync); + amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.resv, owner); + r = amdgpu_sync_wait(&sync, true); + amdgpu_sync_free(&sync); + + return r; +} + /* * amdgpu_vm_update_level - update a single level in the hierarchy * @@ -821,11 +1011,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, unsigned level) { struct amdgpu_bo *shadow; - struct amdgpu_ring *ring; - uint64_t pd_addr, shadow_addr; + struct amdgpu_ring *ring = NULL; + uint64_t pd_addr, shadow_addr = 0; uint32_t incr = amdgpu_vm_bo_size(adev, level + 1); uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; - unsigned count = 0, pt_idx, ndw; + unsigned count = 0, pt_idx, ndw = 0; struct amdgpu_job *job; struct amdgpu_pte_update_params params; struct dma_fence *fence = NULL; @@ -834,34 +1024,45 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, if (!parent->entries) return 0; - ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); - /* padding, etc. */ - ndw = 64; + memset(¶ms, 0, sizeof(params)); + params.adev = adev; + shadow = parent->bo->shadow; - /* assume the worst case */ - ndw += parent->last_entry_used * 6; + if (vm->use_cpu_for_update) { + pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); + r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); + if (unlikely(r)) + return r; - pd_addr = amdgpu_bo_gpu_offset(parent->bo); + params.func = amdgpu_vm_cpu_set_ptes; + } else { + ring = container_of(vm->entity.sched, struct amdgpu_ring, + sched); - shadow = parent->bo->shadow; - if (shadow) { - r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); + /* padding, etc. */ + ndw = 64; + + /* assume the worst case */ + ndw += parent->last_entry_used * 6; + + pd_addr = amdgpu_bo_gpu_offset(parent->bo); + + if (shadow) { + shadow_addr = amdgpu_bo_gpu_offset(shadow); + ndw *= 2; + } else { + shadow_addr = 0; + } + + r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); if (r) return r; - shadow_addr = amdgpu_bo_gpu_offset(shadow); - ndw *= 2; - } else { - shadow_addr = 0; - } - r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); - if (r) - return r; + params.ib = &job->ibs[0]; + params.func = amdgpu_vm_do_set_ptes; + } - memset(¶ms, 0, sizeof(params)); - params.adev = adev; - params.ib = &job->ibs[0]; /* walk over the address space and update the directory */ for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { @@ -871,20 +1072,14 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, if (bo == NULL) continue; - if (bo->shadow) { - struct amdgpu_bo *pt_shadow = bo->shadow; - - r = amdgpu_ttm_bind(&pt_shadow->tbo, - &pt_shadow->tbo.mem); - if (r) - return r; - } - pt = amdgpu_bo_gpu_offset(bo); - if (parent->entries[pt_idx].addr == pt) + pt = amdgpu_gart_get_vm_pde(adev, pt); + /* Don't update huge pages here */ + if ((parent->entries[pt_idx].addr & AMDGPU_PDE_PTE) || + parent->entries[pt_idx].addr == (pt | AMDGPU_PTE_VALID)) continue; - parent->entries[pt_idx].addr = pt; + parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID; pde = pd_addr + pt_idx * 8; if (((last_pde + 8 * count) != pde) || @@ -892,19 +1087,16 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { if (count) { - uint64_t pt_addr = - amdgpu_vm_adjust_mc_addr(adev, last_pt); - if (shadow) - amdgpu_vm_do_set_ptes(¶ms, - last_shadow, - pt_addr, count, - incr, - AMDGPU_PTE_VALID); - - amdgpu_vm_do_set_ptes(¶ms, last_pde, - pt_addr, count, incr, - AMDGPU_PTE_VALID); + params.func(¶ms, + last_shadow, + last_pt, count, + incr, + AMDGPU_PTE_VALID); + + params.func(¶ms, last_pde, + last_pt, count, incr, + AMDGPU_PTE_VALID); } count = 1; @@ -917,36 +1109,37 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, } if (count) { - uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt); - if (vm->root.bo->shadow) - amdgpu_vm_do_set_ptes(¶ms, last_shadow, pt_addr, - count, incr, AMDGPU_PTE_VALID); + params.func(¶ms, last_shadow, last_pt, + count, incr, AMDGPU_PTE_VALID); - amdgpu_vm_do_set_ptes(¶ms, last_pde, pt_addr, - count, incr, AMDGPU_PTE_VALID); + params.func(¶ms, last_pde, last_pt, + count, incr, AMDGPU_PTE_VALID); } - if (params.ib->length_dw == 0) { - amdgpu_job_free(job); - } else { - amdgpu_ring_pad_ib(ring, params.ib); - amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv, - AMDGPU_FENCE_OWNER_VM); - if (shadow) - amdgpu_sync_resv(adev, &job->sync, shadow->tbo.resv, + if (!vm->use_cpu_for_update) { + if (params.ib->length_dw == 0) { + amdgpu_job_free(job); + } else { + amdgpu_ring_pad_ib(ring, params.ib); + amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv, AMDGPU_FENCE_OWNER_VM); + if (shadow) + amdgpu_sync_resv(adev, &job->sync, + shadow->tbo.resv, + AMDGPU_FENCE_OWNER_VM); + + WARN_ON(params.ib->length_dw > ndw); + r = amdgpu_job_submit(job, ring, &vm->entity, + AMDGPU_FENCE_OWNER_VM, &fence); + if (r) + goto error_free; - WARN_ON(params.ib->length_dw > ndw); - r = amdgpu_job_submit(job, ring, &vm->entity, - AMDGPU_FENCE_OWNER_VM, &fence); - if (r) - goto error_free; - - amdgpu_bo_fence(parent->bo, fence, true); - dma_fence_put(vm->last_dir_update); - vm->last_dir_update = dma_fence_get(fence); - dma_fence_put(fence); + amdgpu_bo_fence(parent->bo, fence, true); + dma_fence_put(vm->last_dir_update); + vm->last_dir_update = dma_fence_get(fence); + dma_fence_put(fence); + } } /* * Recurse into the subdirectories. This recursion is harmless because @@ -971,6 +1164,32 @@ error_free: } /* + * amdgpu_vm_invalidate_level - mark all PD levels as invalid + * + * @parent: parent PD + * + * Mark all PD level as invalid after an error. + */ +static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent) +{ + unsigned pt_idx; + + /* + * Recurse into the subdirectories. This recursion is harmless because + * we only have a maximum of 5 layers. + */ + for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { + struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; + + if (!entry->bo) + continue; + + entry->addr = ~0ULL; + amdgpu_vm_invalidate_level(entry); + } +} + +/* * amdgpu_vm_update_directories - make sure that all directories are valid * * @adev: amdgpu_device pointer @@ -982,33 +1201,118 @@ error_free: int amdgpu_vm_update_directories(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - return amdgpu_vm_update_level(adev, vm, &vm->root, 0); + int r; + + r = amdgpu_vm_update_level(adev, vm, &vm->root, 0); + if (r) + amdgpu_vm_invalidate_level(&vm->root); + + if (vm->use_cpu_for_update) { + /* Flush HDP */ + mb(); + amdgpu_gart_flush_gpu_tlb(adev, 0); + } + + return r; } /** - * amdgpu_vm_find_pt - find the page table for an address + * amdgpu_vm_find_entry - find the entry for an address * * @p: see amdgpu_pte_update_params definition * @addr: virtual address in question + * @entry: resulting entry or NULL + * @parent: parent entry * - * Find the page table BO for a virtual address, return NULL when none found. + * Find the vm_pt entry and it's parent for the given address. */ -static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p, - uint64_t addr) +void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, + struct amdgpu_vm_pt **entry, + struct amdgpu_vm_pt **parent) { - struct amdgpu_vm_pt *entry = &p->vm->root; unsigned idx, level = p->adev->vm_manager.num_level; - while (entry->entries) { + *parent = NULL; + *entry = &p->vm->root; + while ((*entry)->entries) { idx = addr >> (p->adev->vm_manager.block_size * level--); - idx %= amdgpu_bo_size(entry->bo) / 8; - entry = &entry->entries[idx]; + idx %= amdgpu_bo_size((*entry)->bo) / 8; + *parent = *entry; + *entry = &(*entry)->entries[idx]; } if (level) - return NULL; + *entry = NULL; +} + +/** + * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages + * + * @p: see amdgpu_pte_update_params definition + * @entry: vm_pt entry to check + * @parent: parent entry + * @nptes: number of PTEs updated with this operation + * @dst: destination address where the PTEs should point to + * @flags: access flags fro the PTEs + * + * Check if we can update the PD with a huge page. + */ +static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, + struct amdgpu_vm_pt *entry, + struct amdgpu_vm_pt *parent, + unsigned nptes, uint64_t dst, + uint64_t flags) +{ + bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes); + uint64_t pd_addr, pde; + + /* In the case of a mixed PT the PDE must point to it*/ + if (p->adev->asic_type < CHIP_VEGA10 || + nptes != AMDGPU_VM_PTE_COUNT(p->adev) || + p->src || + !(flags & AMDGPU_PTE_VALID)) { + + dst = amdgpu_bo_gpu_offset(entry->bo); + dst = amdgpu_gart_get_vm_pde(p->adev, dst); + flags = AMDGPU_PTE_VALID; + } else { + /* Set the huge page flag to stop scanning at this PDE */ + flags |= AMDGPU_PDE_PTE; + } + + if (entry->addr == (dst | flags)) + return; + + entry->addr = (dst | flags); + + if (use_cpu_update) { + /* In case a huge page is replaced with a system + * memory mapping, p->pages_addr != NULL and + * amdgpu_vm_cpu_set_ptes would try to translate dst + * through amdgpu_vm_map_gart. But dst is already a + * GPU address (of the page table). Disable + * amdgpu_vm_map_gart temporarily. + */ + dma_addr_t *tmp; - return entry->bo; + tmp = p->pages_addr; + p->pages_addr = NULL; + + pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); + pde = pd_addr + (entry - parent->entries) * 8; + amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); + + p->pages_addr = tmp; + } else { + if (parent->bo->shadow) { + pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow); + pde = pd_addr + (entry - parent->entries) * 8; + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); + } + pd_addr = amdgpu_bo_gpu_offset(parent->bo); + pde = pd_addr + (entry - parent->entries) * 8; + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); + } } /** @@ -1022,92 +1326,59 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p, * @flags: mapping flags * * Update the page tables in the range @start - @end. + * Returns 0 for success, -EINVAL for failure. */ -static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, +static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, uint64_t start, uint64_t end, uint64_t dst, uint64_t flags) { struct amdgpu_device *adev = params->adev; const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1; - uint64_t cur_pe_start, cur_nptes, cur_dst; - uint64_t addr; /* next GPU address to be updated */ + uint64_t addr, pe_start; struct amdgpu_bo *pt; - unsigned nptes; /* next number of ptes to be updated */ - uint64_t next_pe_start; - - /* initialize the variables */ - addr = start; - pt = amdgpu_vm_get_pt(params, addr); - if (!pt) { - pr_err("PT not found, aborting update_ptes\n"); - return; - } - - if (params->shadow) { - if (!pt->shadow) - return; - pt = pt->shadow; - } - if ((addr & ~mask) == (end & ~mask)) - nptes = end - addr; - else - nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); - - cur_pe_start = amdgpu_bo_gpu_offset(pt); - cur_pe_start += (addr & mask) * 8; - cur_nptes = nptes; - cur_dst = dst; - - /* for next ptb*/ - addr += nptes; - dst += nptes * AMDGPU_GPU_PAGE_SIZE; + unsigned nptes; + bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); /* walk over the address space and update the page tables */ - while (addr < end) { - pt = amdgpu_vm_get_pt(params, addr); - if (!pt) { - pr_err("PT not found, aborting update_ptes\n"); - return; - } + for (addr = start; addr < end; addr += nptes, + dst += nptes * AMDGPU_GPU_PAGE_SIZE) { + struct amdgpu_vm_pt *entry, *parent; - if (params->shadow) { - if (!pt->shadow) - return; - pt = pt->shadow; - } + amdgpu_vm_get_entry(params, addr, &entry, &parent); + if (!entry) + return -ENOENT; if ((addr & ~mask) == (end & ~mask)) nptes = end - addr; else nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); - next_pe_start = amdgpu_bo_gpu_offset(pt); - next_pe_start += (addr & mask) * 8; + amdgpu_vm_handle_huge_pages(params, entry, parent, + nptes, dst, flags); + /* We don't need to update PTEs for huge pages */ + if (entry->addr & AMDGPU_PDE_PTE) + continue; - if ((cur_pe_start + 8 * cur_nptes) == next_pe_start && - ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) { - /* The next ptb is consecutive to current ptb. - * Don't call the update function now. - * Will update two ptbs together in future. - */ - cur_nptes += nptes; + pt = entry->bo; + if (use_cpu_update) { + pe_start = (unsigned long)amdgpu_bo_kptr(pt); } else { - params->func(params, cur_pe_start, cur_dst, cur_nptes, - AMDGPU_GPU_PAGE_SIZE, flags); - - cur_pe_start = next_pe_start; - cur_nptes = nptes; - cur_dst = dst; + if (pt->shadow) { + pe_start = amdgpu_bo_gpu_offset(pt->shadow); + pe_start += (addr & mask) * 8; + params->func(params, pe_start, dst, nptes, + AMDGPU_GPU_PAGE_SIZE, flags); + } + pe_start = amdgpu_bo_gpu_offset(pt); } - /* for next ptb*/ - addr += nptes; - dst += nptes * AMDGPU_GPU_PAGE_SIZE; + pe_start += (addr & mask) * 8; + params->func(params, pe_start, dst, nptes, + AMDGPU_GPU_PAGE_SIZE, flags); } - params->func(params, cur_pe_start, cur_dst, cur_nptes, - AMDGPU_GPU_PAGE_SIZE, flags); + return 0; } /* @@ -1119,11 +1390,14 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, * @end: last PTE to handle * @dst: addr those PTEs should point to * @flags: hw mapping flags + * Returns 0 for success, -EINVAL for failure. */ -static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, +static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, uint64_t start, uint64_t end, uint64_t dst, uint64_t flags) { + int r; + /** * The MC L1 TLB supports variable sized pages, based on a fragment * field in the PTE. When this field is set to a non-zero value, page @@ -1142,38 +1416,39 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, * Userspace can support this by aligning virtual base address and * allocation size to the fragment size. */ - - /* SI and newer are optimized for 64KB */ - uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG); - uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG; + unsigned pages_per_frag = params->adev->vm_manager.fragment_size; + uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag); + uint64_t frag_align = 1 << pages_per_frag; uint64_t frag_start = ALIGN(start, frag_align); uint64_t frag_end = end & ~(frag_align - 1); /* system pages are non continuously */ if (params->src || !(flags & AMDGPU_PTE_VALID) || - (frag_start >= frag_end)) { - - amdgpu_vm_update_ptes(params, start, end, dst, flags); - return; - } + (frag_start >= frag_end)) + return amdgpu_vm_update_ptes(params, start, end, dst, flags); /* handle the 4K area at the beginning */ if (start != frag_start) { - amdgpu_vm_update_ptes(params, start, frag_start, - dst, flags); + r = amdgpu_vm_update_ptes(params, start, frag_start, + dst, flags); + if (r) + return r; dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE; } /* handle the area in the middle */ - amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, - flags | frag_flags); + r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, + flags | frag_flags); + if (r) + return r; /* handle the 4K area at the end */ if (frag_end != end) { dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; - amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); + r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); } + return r; } /** @@ -1215,12 +1490,30 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, params.vm = vm; params.src = src; - ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); - /* sync to everything on unmapping */ if (!(flags & AMDGPU_PTE_VALID)) owner = AMDGPU_FENCE_OWNER_UNDEFINED; + if (vm->use_cpu_for_update) { + /* params.src is used as flag to indicate system Memory */ + if (pages_addr) + params.src = ~0; + + /* Wait for PT BOs to be free. PTs share the same resv. object + * as the root PD BO + */ + r = amdgpu_vm_wait_pd(adev, vm, owner); + if (unlikely(r)) + return r; + + params.func = amdgpu_vm_cpu_set_ptes; + params.pages_addr = pages_addr; + return amdgpu_vm_frag_ptes(¶ms, start, last + 1, + addr, flags); + } + + ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); + nptes = last - start + 1; /* @@ -1232,6 +1525,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, /* padding, etc. */ ndw = 64; + /* one PDE write for each huge page */ + ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6; + if (src) { /* only copy commands needed */ ndw += ncmds * 7; @@ -1293,10 +1589,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - params.shadow = true; - amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); - params.shadow = false; - amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); + r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); + if (r) + goto error_free; amdgpu_ring_pad_ib(ring, params.ib); WARN_ON(params.ib->length_dw > ndw); @@ -1312,6 +1607,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, error_free: amdgpu_job_free(job); + amdgpu_vm_invalidate_level(&vm->root); return r; } @@ -1320,7 +1616,6 @@ error_free: * * @adev: amdgpu_device pointer * @exclusive: fence we need to sync to - * @gtt_flags: flags as they are used for GTT * @pages_addr: DMA addresses to use for mapping * @vm: requested vm * @mapping: mapped range and flags to use for the update @@ -1334,7 +1629,6 @@ error_free: */ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, struct dma_fence *exclusive, - uint64_t gtt_flags, dma_addr_t *pages_addr, struct amdgpu_vm *vm, struct amdgpu_bo_va_mapping *mapping, @@ -1389,11 +1683,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, } if (pages_addr) { - if (flags == gtt_flags) - src = adev->gart.table_addr + - (addr >> AMDGPU_GPU_PAGE_SHIFT) * 8; - else - max_entries = min(max_entries, 16ull * 1024ull); + max_entries = min(max_entries, 16ull * 1024ull); addr = 0; } else if (flags & AMDGPU_PTE_VALID) { addr += adev->vm_manager.vram_base_offset; @@ -1434,50 +1724,45 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear) { - struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_bo *bo = bo_va->base.bo; + struct amdgpu_vm *vm = bo_va->base.vm; struct amdgpu_bo_va_mapping *mapping; dma_addr_t *pages_addr = NULL; - uint64_t gtt_flags, flags; struct ttm_mem_reg *mem; struct drm_mm_node *nodes; struct dma_fence *exclusive; + uint64_t flags; int r; - if (clear || !bo_va->bo) { + if (clear || !bo_va->base.bo) { mem = NULL; nodes = NULL; exclusive = NULL; } else { struct ttm_dma_tt *ttm; - mem = &bo_va->bo->tbo.mem; + mem = &bo_va->base.bo->tbo.mem; nodes = mem->mm_node; if (mem->mem_type == TTM_PL_TT) { - ttm = container_of(bo_va->bo->tbo.ttm, struct - ttm_dma_tt, ttm); + ttm = container_of(bo_va->base.bo->tbo.ttm, + struct ttm_dma_tt, ttm); pages_addr = ttm->dma_address; } - exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv); + exclusive = reservation_object_get_excl(bo->tbo.resv); } - if (bo_va->bo) { - flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); - gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) && - adev == amdgpu_ttm_adev(bo_va->bo->tbo.bdev)) ? - flags : 0; - } else { + if (bo) + flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); + else flags = 0x0; - gtt_flags = ~0x0; - } spin_lock(&vm->status_lock); - if (!list_empty(&bo_va->vm_status)) + if (!list_empty(&bo_va->base.vm_status)) list_splice_init(&bo_va->valids, &bo_va->invalids); spin_unlock(&vm->status_lock); list_for_each_entry(mapping, &bo_va->invalids, list) { - r = amdgpu_vm_bo_split_mapping(adev, exclusive, - gtt_flags, pages_addr, vm, + r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm, mapping, flags, nodes, &bo_va->last_pt_update); if (r) @@ -1494,11 +1779,17 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, spin_lock(&vm->status_lock); list_splice_init(&bo_va->invalids, &bo_va->valids); - list_del_init(&bo_va->vm_status); + list_del_init(&bo_va->base.vm_status); if (clear) - list_add(&bo_va->vm_status, &vm->cleared); + list_add(&bo_va->base.vm_status, &vm->cleared); spin_unlock(&vm->status_lock); + if (vm->use_cpu_for_update) { + /* Flush HDP */ + mb(); + amdgpu_gart_flush_gpu_tlb(adev, 0); + } + return 0; } @@ -1652,15 +1943,19 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping; struct dma_fence *f = NULL; int r; + uint64_t init_pte_value = 0; while (!list_empty(&vm->freed)) { mapping = list_first_entry(&vm->freed, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); + if (vm->pte_support_ats) + init_pte_value = AMDGPU_PTE_SYSTEM; + r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, NULL, vm, mapping->start, mapping->last, - 0, 0, &f); + init_pte_value, 0, &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); @@ -1680,26 +1975,26 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, } /** - * amdgpu_vm_clear_invalids - clear invalidated BOs in the PT + * amdgpu_vm_clear_moved - clear moved BOs in the PT * * @adev: amdgpu_device pointer * @vm: requested vm * - * Make sure all invalidated BOs are cleared in the PT. + * Make sure all moved BOs are cleared in the PT. * Returns 0 for success. * * PTs have to be reserved and mutex must be locked! */ -int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, - struct amdgpu_vm *vm, struct amdgpu_sync *sync) +int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_sync *sync) { struct amdgpu_bo_va *bo_va = NULL; int r = 0; spin_lock(&vm->status_lock); - while (!list_empty(&vm->invalidated)) { - bo_va = list_first_entry(&vm->invalidated, - struct amdgpu_bo_va, vm_status); + while (!list_empty(&vm->moved)) { + bo_va = list_first_entry(&vm->moved, + struct amdgpu_bo_va, base.vm_status); spin_unlock(&vm->status_lock); r = amdgpu_vm_bo_update(adev, bo_va, true); @@ -1739,16 +2034,17 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, if (bo_va == NULL) { return NULL; } - bo_va->vm = vm; - bo_va->bo = bo; + bo_va->base.vm = vm; + bo_va->base.bo = bo; + INIT_LIST_HEAD(&bo_va->base.bo_list); + INIT_LIST_HEAD(&bo_va->base.vm_status); + bo_va->ref_count = 1; - INIT_LIST_HEAD(&bo_va->bo_list); INIT_LIST_HEAD(&bo_va->valids); INIT_LIST_HEAD(&bo_va->invalids); - INIT_LIST_HEAD(&bo_va->vm_status); if (bo) - list_add_tail(&bo_va->bo_list, &bo->va); + list_add_tail(&bo_va->base.bo_list, &bo->va); return bo_va; } @@ -1773,7 +2069,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, uint64_t size, uint64_t flags) { struct amdgpu_bo_va_mapping *mapping, *tmp; - struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_bo *bo = bo_va->base.bo; + struct amdgpu_vm *vm = bo_va->base.vm; uint64_t eaddr; /* validate the parameters */ @@ -1784,7 +2081,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, /* make sure object fit at this offset */ eaddr = saddr + size - 1; if (saddr >= eaddr || - (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) + (bo && offset + size > amdgpu_bo_size(bo))) return -EINVAL; saddr /= AMDGPU_GPU_PAGE_SIZE; @@ -1794,7 +2091,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, if (tmp) { /* bo and tmp overlap, invalid addr */ dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " - "0x%010Lx-0x%010Lx\n", bo_va->bo, saddr, eaddr, + "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr, tmp->start, tmp->last + 1); return -EINVAL; } @@ -1839,7 +2136,8 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, uint64_t size, uint64_t flags) { struct amdgpu_bo_va_mapping *mapping; - struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_bo *bo = bo_va->base.bo; + struct amdgpu_vm *vm = bo_va->base.vm; uint64_t eaddr; int r; @@ -1851,7 +2149,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, /* make sure object fit at this offset */ eaddr = saddr + size - 1; if (saddr >= eaddr || - (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) + (bo && offset + size > amdgpu_bo_size(bo))) return -EINVAL; /* Allocate all the needed memory */ @@ -1859,7 +2157,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, if (!mapping) return -ENOMEM; - r = amdgpu_vm_bo_clear_mappings(adev, bo_va->vm, saddr, size); + r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size); if (r) { kfree(mapping); return r; @@ -1899,7 +2197,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, uint64_t saddr) { struct amdgpu_bo_va_mapping *mapping; - struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_vm *vm = bo_va->base.vm; bool valid = true; saddr /= AMDGPU_GPU_PAGE_SIZE; @@ -2047,12 +2345,12 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va) { struct amdgpu_bo_va_mapping *mapping, *next; - struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_vm *vm = bo_va->base.vm; - list_del(&bo_va->bo_list); + list_del(&bo_va->base.bo_list); spin_lock(&vm->status_lock); - list_del(&bo_va->vm_status); + list_del(&bo_va->base.vm_status); spin_unlock(&vm->status_lock); list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { @@ -2084,13 +2382,14 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo *bo) { - struct amdgpu_bo_va *bo_va; - - list_for_each_entry(bo_va, &bo->va, bo_list) { - spin_lock(&bo_va->vm->status_lock); - if (list_empty(&bo_va->vm_status)) - list_add(&bo_va->vm_status, &bo_va->vm->invalidated); - spin_unlock(&bo_va->vm->status_lock); + struct amdgpu_vm_bo_base *bo_base; + + list_for_each_entry(bo_base, &bo->va, bo_list) { + spin_lock(&bo_base->vm->status_lock); + if (list_empty(&bo_base->vm_status)) + list_add(&bo_base->vm_status, + &bo_base->vm->moved); + spin_unlock(&bo_base->vm->status_lock); } } @@ -2108,12 +2407,26 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) } /** - * amdgpu_vm_adjust_size - adjust vm size and block size + * amdgpu_vm_set_fragment_size - adjust fragment size in PTE + * + * @adev: amdgpu_device pointer + * @fragment_size_default: the default fragment size if it's set auto + */ +void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, uint32_t fragment_size_default) +{ + if (amdgpu_vm_fragment_size == -1) + adev->vm_manager.fragment_size = fragment_size_default; + else + adev->vm_manager.fragment_size = amdgpu_vm_fragment_size; +} + +/** + * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size * * @adev: amdgpu_device pointer * @vm_size: the default vm size if it's set auto */ -void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) +void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, uint32_t fragment_size_default) { /* adjust vm size firstly */ if (amdgpu_vm_size == -1) @@ -2128,8 +2441,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) else adev->vm_manager.block_size = amdgpu_vm_block_size; - DRM_INFO("vm size is %llu GB, block size is %u-bit\n", - adev->vm_manager.vm_size, adev->vm_manager.block_size); + amdgpu_vm_set_fragment_size(adev, fragment_size_default); + + DRM_INFO("vm size is %llu GB, block size is %u-bit, fragment size is %u-bit\n", + adev->vm_manager.vm_size, adev->vm_manager.block_size, + adev->vm_manager.fragment_size); } /** @@ -2137,22 +2453,28 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) * * @adev: amdgpu_device pointer * @vm: requested vm + * @vm_context: Indicates if it GFX or Compute context * * Init @vm fields. */ -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int vm_context) { const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, AMDGPU_VM_PTE_COUNT(adev) * 8); unsigned ring_instance; struct amdgpu_ring *ring; struct amd_sched_rq *rq; - int r; + int r, i; + u64 flags; + uint64_t init_pde_value = 0; - vm->va = RB_ROOT; + vm->va = RB_ROOT_CACHED; vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); + for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) + vm->reserved_vmid[i] = NULL; spin_lock_init(&vm->status_lock); - INIT_LIST_HEAD(&vm->invalidated); + INIT_LIST_HEAD(&vm->moved); INIT_LIST_HEAD(&vm->cleared); INIT_LIST_HEAD(&vm->freed); @@ -2167,15 +2489,37 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (r) return r; + vm->pte_support_ats = false; + + if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { + vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & + AMDGPU_VM_USE_CPU_FOR_COMPUTE); + + if (adev->asic_type == CHIP_RAVEN) { + vm->pte_support_ats = true; + init_pde_value = AMDGPU_PTE_SYSTEM | AMDGPU_PDE_PTE; + } + } else + vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & + AMDGPU_VM_USE_CPU_FOR_GFX); + DRM_DEBUG_DRIVER("VM update mode is %s\n", + vm->use_cpu_for_update ? "CPU" : "SDMA"); + WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), + "CPU update of VM recommended only for large BAR system\n"); vm->last_dir_update = NULL; + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_VRAM_CLEARED; + if (vm->use_cpu_for_update) + flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + else + flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | + AMDGPU_GEM_CREATE_SHADOW); + r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_NO_CPU_ACCESS | - AMDGPU_GEM_CREATE_SHADOW | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED, - NULL, NULL, &vm->root.bo); + flags, + NULL, NULL, init_pde_value, &vm->root.bo); if (r) goto error_free_sched_entity; @@ -2184,6 +2528,13 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) goto error_free_root; vm->last_eviction_counter = atomic64_read(&adev->num_evictions); + + if (vm->use_cpu_for_update) { + r = amdgpu_bo_kmap(vm->root.bo, NULL); + if (r) + goto error_free_root; + } + amdgpu_bo_unreserve(vm->root.bo); return 0; @@ -2219,7 +2570,7 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level) for (i = 0; i <= level->last_entry_used; i++) amdgpu_vm_free_levels(&level->entries[i]); - drm_free_large(level->entries); + kvfree(level->entries); } /** @@ -2235,13 +2586,15 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdgpu_bo_va_mapping *mapping, *tmp; bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; + int i; amd_sched_entity_fini(vm->entity.sched, &vm->entity); - if (!RB_EMPTY_ROOT(&vm->va)) { + if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { dev_err(adev->dev, "still active bo inside vm\n"); } - rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, rb) { + rbtree_postorder_for_each_entry_safe(mapping, tmp, + &vm->va.rb_root, rb) { list_del(&mapping->list); amdgpu_vm_it_remove(mapping, &vm->va); kfree(mapping); @@ -2258,6 +2611,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_free_levels(&vm->root); dma_fence_put(vm->last_dir_update); + for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) + amdgpu_vm_free_reserved_vmid(adev, vm, i); } /** @@ -2277,6 +2632,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) mutex_init(&id_mgr->lock); INIT_LIST_HEAD(&id_mgr->ids_lru); + atomic_set(&id_mgr->reserved_vmid_num, 0); /* skip over VMID 0, since it is the system VM */ for (j = 1; j < id_mgr->num_ids; ++j) { @@ -2295,6 +2651,23 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) atomic64_set(&adev->vm_manager.client_counter, 0); spin_lock_init(&adev->vm_manager.prt_lock); atomic_set(&adev->vm_manager.num_prt_users, 0); + + /* If not overridden by the user, by default, only in large BAR systems + * Compute VM tables will be updated by CPU + */ +#ifdef CONFIG_X86_64 + if (amdgpu_vm_update_mode == -1) { + if (amdgpu_vm_is_large_bar(adev)) + adev->vm_manager.vm_update_mode = + AMDGPU_VM_USE_CPU_FOR_COMPUTE; + else + adev->vm_manager.vm_update_mode = 0; + } else + adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode; +#else + adev->vm_manager.vm_update_mode = 0; +#endif + } /** @@ -2322,3 +2695,28 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) } } } + +int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + union drm_amdgpu_vm *args = data; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + int r; + + switch (args->in.op) { + case AMDGPU_VM_OP_RESERVE_VMID: + /* current, we only have requirement to reserve vmid from gfxhub */ + r = amdgpu_vm_alloc_reserved_vmid(adev, &fpriv->vm, + AMDGPU_GFXHUB); + if (r) + return r; + break; + case AMDGPU_VM_OP_UNRESERVE_VMID: + amdgpu_vm_free_reserved_vmid(adev, &fpriv->vm, AMDGPU_GFXHUB); + break; + default: + return -EINVAL; + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index e1d951ece433..6716355403ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -50,9 +50,6 @@ struct amdgpu_bo_list_entry; /* PTBs (Page Table Blocks) need to be aligned to 32K */ #define AMDGPU_VM_PTB_ALIGN_SIZE 32768 -/* LOG2 number of continuous pages for the fragment field */ -#define AMDGPU_LOG2_PAGES_PER_FRAG 4 - #define AMDGPU_PTE_VALID (1ULL << 0) #define AMDGPU_PTE_SYSTEM (1ULL << 1) #define AMDGPU_PTE_SNOOPED (1ULL << 2) @@ -68,6 +65,9 @@ struct amdgpu_bo_list_entry; /* TILED for VEGA10, reserved for older ASICs */ #define AMDGPU_PTE_PRT (1ULL << 51) +/* PDE is handled as PTE for VEGA10 */ +#define AMDGPU_PDE_PTE (1ULL << 54) + /* VEGA10 only */ #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) @@ -84,6 +84,28 @@ struct amdgpu_bo_list_entry; /* hardcode that limit for now */ #define AMDGPU_VA_RESERVED_SIZE (8 << 20) +/* max vmids dedicated for process */ +#define AMDGPU_VM_MAX_RESERVED_VMID 1 + +#define AMDGPU_VM_CONTEXT_GFX 0 +#define AMDGPU_VM_CONTEXT_COMPUTE 1 + +/* See vm_update_mode */ +#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) +#define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) + +/* base structure for tracking BO usage in a VM */ +struct amdgpu_vm_bo_base { + /* constant after initialization */ + struct amdgpu_vm *vm; + struct amdgpu_bo *bo; + + /* protected by bo being reserved */ + struct list_head bo_list; + + /* protected by spinlock */ + struct list_head vm_status; +}; struct amdgpu_vm_pt { struct amdgpu_bo *bo; @@ -96,13 +118,13 @@ struct amdgpu_vm_pt { struct amdgpu_vm { /* tree of virtual addresses mapped */ - struct rb_root va; + struct rb_root_cached va; /* protecting invalidated */ spinlock_t status_lock; /* BOs moved, but not yet updated in the PT */ - struct list_head invalidated; + struct list_head moved; /* BOs cleared in the PT because of a move */ struct list_head cleared; @@ -123,8 +145,14 @@ struct amdgpu_vm { /* client id */ u64 client_id; - /* each VM will map on CSA */ - struct amdgpu_bo_va *csa_bo_va; + /* dedicated to vm */ + struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS]; + + /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ + bool use_cpu_for_update; + + /* Flag to indicate ATS support from PTE for GFX9 */ + bool pte_support_ats; }; struct amdgpu_vm_id { @@ -152,6 +180,7 @@ struct amdgpu_vm_id_manager { unsigned num_ids; struct list_head ids_lru; struct amdgpu_vm_id ids[AMDGPU_NUM_VM]; + atomic_t reserved_vmid_num; }; struct amdgpu_vm_manager { @@ -166,10 +195,9 @@ struct amdgpu_vm_manager { uint32_t num_level; uint64_t vm_size; uint32_t block_size; + uint32_t fragment_size; /* vram base address for page table entry */ u64 vram_base_offset; - /* is vm enabled? */ - bool enabled; /* vm pte handling */ const struct amdgpu_vm_pte_funcs *vm_pte_funcs; struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS]; @@ -181,11 +209,18 @@ struct amdgpu_vm_manager { /* partial resident texture handling */ spinlock_t prt_lock; atomic_t num_prt_users; + + /* controls how VM page tables are updated for Graphics and Compute. + * BIT0[= 0] Graphics updated by SDMA [= 1] by CPU + * BIT1[= 0] Compute updated by SDMA [= 1] by CPU + */ + int vm_update_mode; }; void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_fini(struct amdgpu_device *adev); -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm); +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int vm_context); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, struct list_head *validated, @@ -193,15 +228,13 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*callback)(void *p, struct amdgpu_bo *bo), void *param); -void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, - struct amdgpu_vm *vm); int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm *vm, uint64_t saddr, uint64_t size); int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, struct amdgpu_sync *sync, struct dma_fence *fence, struct amdgpu_job *job); -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job); +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, unsigned vmid); void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev); @@ -210,8 +243,8 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); -int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_sync *sync); +int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_sync *sync); int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear); @@ -238,6 +271,13 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, uint64_t saddr, uint64_t size); void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va); -void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size); +void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, + uint32_t fragment_size_default); +void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, + uint32_t fragment_size_default); +int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, + struct amdgpu_job *job); +void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index a2c59a08b2bd..26e900627971 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -28,6 +28,8 @@ struct amdgpu_vram_mgr { struct drm_mm mm; spinlock_t lock; + atomic64_t usage; + atomic64_t vis_usage; }; /** @@ -79,6 +81,27 @@ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man) } /** + * amdgpu_vram_mgr_vis_size - Calculate visible node size + * + * @adev: amdgpu device structure + * @node: MM node structure + * + * Calculate how many bytes of the MM node are inside visible VRAM + */ +static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, + struct drm_mm_node *node) +{ + uint64_t start = node->start << PAGE_SHIFT; + uint64_t end = (node->size + node->start) << PAGE_SHIFT; + + if (start >= adev->mc.visible_vram_size) + return 0; + + return (end > adev->mc.visible_vram_size ? + adev->mc.visible_vram_size : end) - start; +} + +/** * amdgpu_vram_mgr_new - allocate new ranges * * @man: TTM memory type manager @@ -93,11 +116,13 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, const struct ttm_place *place, struct ttm_mem_reg *mem) { + struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_vram_mgr *mgr = man->priv; struct drm_mm *mm = &mgr->mm; struct drm_mm_node *nodes; enum drm_mm_insert_mode mode; unsigned long lpfn, num_nodes, pages_per_node, pages_left; + uint64_t usage = 0, vis_usage = 0; unsigned i; int r; @@ -142,6 +167,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, if (unlikely(r)) goto error; + usage += nodes[i].size << PAGE_SHIFT; + vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]); + /* Calculate a virtual BO start address to easily check if * everything is CPU accessible. */ @@ -155,6 +183,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, } spin_unlock(&mgr->lock); + atomic64_add(usage, &mgr->usage); + atomic64_add(vis_usage, &mgr->vis_usage); + mem->mm_node = nodes; return 0; @@ -181,8 +212,10 @@ error: static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, struct ttm_mem_reg *mem) { + struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_vram_mgr *mgr = man->priv; struct drm_mm_node *nodes = mem->mm_node; + uint64_t usage = 0, vis_usage = 0; unsigned pages = mem->num_pages; if (!mem->mm_node) @@ -192,31 +225,67 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, while (pages) { pages -= nodes->size; drm_mm_remove_node(nodes); + usage += nodes->size << PAGE_SHIFT; + vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes); ++nodes; } spin_unlock(&mgr->lock); + atomic64_sub(usage, &mgr->usage); + atomic64_sub(vis_usage, &mgr->vis_usage); + kfree(mem->mm_node); mem->mm_node = NULL; } /** + * amdgpu_vram_mgr_usage - how many bytes are used in this domain + * + * @man: TTM memory type manager + * + * Returns how many bytes are used in this domain. + */ +uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man) +{ + struct amdgpu_vram_mgr *mgr = man->priv; + + return atomic64_read(&mgr->usage); +} + +/** + * amdgpu_vram_mgr_vis_usage - how many bytes are used in the visible part + * + * @man: TTM memory type manager + * + * Returns how many bytes are used in the visible part of VRAM + */ +uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man) +{ + struct amdgpu_vram_mgr *mgr = man->priv; + + return atomic64_read(&mgr->vis_usage); +} + +/** * amdgpu_vram_mgr_debug - dump VRAM table * * @man: TTM memory type manager - * @prefix: text prefix + * @printer: DRM printer to use * * Dump the table content using printk. */ static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man, - const char *prefix) + struct drm_printer *printer) { struct amdgpu_vram_mgr *mgr = man->priv; - struct drm_printer p = drm_debug_printer(prefix); spin_lock(&mgr->lock); - drm_mm_print(&mgr->mm, &p); + drm_mm_print(&mgr->mm, printer); spin_unlock(&mgr->lock); + + drm_printf(printer, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n", + man->size, amdgpu_vram_mgr_usage(man) >> 20, + amdgpu_vram_mgr_vis_usage(man) >> 20); } const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = { diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index ec93714e4524..cb508a211b2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -22,7 +22,7 @@ */ #include <linux/firmware.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_ucode.h" diff --git a/drivers/gpu/drm/amd/amdgpu/ci_smc.c b/drivers/gpu/drm/amd/amdgpu/ci_smc.c index 7eb9069db8e3..b8ba51e045b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_smc.c @@ -23,7 +23,7 @@ */ #include <linux/firmware.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "cikd.h" #include "ppsmc.h" diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 9d33e5641419..567c4a5cf90c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -24,7 +24,7 @@ #include <linux/firmware.h> #include <linux/slab.h> #include <linux/module.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_atombios.h" #include "amdgpu_ih.h" @@ -964,62 +964,62 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev, } static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = { - {mmGRBM_STATUS, false}, - {mmGB_ADDR_CONFIG, false}, - {mmMC_ARB_RAMCFG, false}, - {mmGB_TILE_MODE0, false}, - {mmGB_TILE_MODE1, false}, - {mmGB_TILE_MODE2, false}, - {mmGB_TILE_MODE3, false}, - {mmGB_TILE_MODE4, false}, - {mmGB_TILE_MODE5, false}, - {mmGB_TILE_MODE6, false}, - {mmGB_TILE_MODE7, false}, - {mmGB_TILE_MODE8, false}, - {mmGB_TILE_MODE9, false}, - {mmGB_TILE_MODE10, false}, - {mmGB_TILE_MODE11, false}, - {mmGB_TILE_MODE12, false}, - {mmGB_TILE_MODE13, false}, - {mmGB_TILE_MODE14, false}, - {mmGB_TILE_MODE15, false}, - {mmGB_TILE_MODE16, false}, - {mmGB_TILE_MODE17, false}, - {mmGB_TILE_MODE18, false}, - {mmGB_TILE_MODE19, false}, - {mmGB_TILE_MODE20, false}, - {mmGB_TILE_MODE21, false}, - {mmGB_TILE_MODE22, false}, - {mmGB_TILE_MODE23, false}, - {mmGB_TILE_MODE24, false}, - {mmGB_TILE_MODE25, false}, - {mmGB_TILE_MODE26, false}, - {mmGB_TILE_MODE27, false}, - {mmGB_TILE_MODE28, false}, - {mmGB_TILE_MODE29, false}, - {mmGB_TILE_MODE30, false}, - {mmGB_TILE_MODE31, false}, - {mmGB_MACROTILE_MODE0, false}, - {mmGB_MACROTILE_MODE1, false}, - {mmGB_MACROTILE_MODE2, false}, - {mmGB_MACROTILE_MODE3, false}, - {mmGB_MACROTILE_MODE4, false}, - {mmGB_MACROTILE_MODE5, false}, - {mmGB_MACROTILE_MODE6, false}, - {mmGB_MACROTILE_MODE7, false}, - {mmGB_MACROTILE_MODE8, false}, - {mmGB_MACROTILE_MODE9, false}, - {mmGB_MACROTILE_MODE10, false}, - {mmGB_MACROTILE_MODE11, false}, - {mmGB_MACROTILE_MODE12, false}, - {mmGB_MACROTILE_MODE13, false}, - {mmGB_MACROTILE_MODE14, false}, - {mmGB_MACROTILE_MODE15, false}, - {mmCC_RB_BACKEND_DISABLE, false, true}, - {mmGC_USER_RB_BACKEND_DISABLE, false, true}, - {mmGB_BACKEND_MAP, false, false}, - {mmPA_SC_RASTER_CONFIG, false, true}, - {mmPA_SC_RASTER_CONFIG_1, false, true}, + {mmGRBM_STATUS}, + {mmGB_ADDR_CONFIG}, + {mmMC_ARB_RAMCFG}, + {mmGB_TILE_MODE0}, + {mmGB_TILE_MODE1}, + {mmGB_TILE_MODE2}, + {mmGB_TILE_MODE3}, + {mmGB_TILE_MODE4}, + {mmGB_TILE_MODE5}, + {mmGB_TILE_MODE6}, + {mmGB_TILE_MODE7}, + {mmGB_TILE_MODE8}, + {mmGB_TILE_MODE9}, + {mmGB_TILE_MODE10}, + {mmGB_TILE_MODE11}, + {mmGB_TILE_MODE12}, + {mmGB_TILE_MODE13}, + {mmGB_TILE_MODE14}, + {mmGB_TILE_MODE15}, + {mmGB_TILE_MODE16}, + {mmGB_TILE_MODE17}, + {mmGB_TILE_MODE18}, + {mmGB_TILE_MODE19}, + {mmGB_TILE_MODE20}, + {mmGB_TILE_MODE21}, + {mmGB_TILE_MODE22}, + {mmGB_TILE_MODE23}, + {mmGB_TILE_MODE24}, + {mmGB_TILE_MODE25}, + {mmGB_TILE_MODE26}, + {mmGB_TILE_MODE27}, + {mmGB_TILE_MODE28}, + {mmGB_TILE_MODE29}, + {mmGB_TILE_MODE30}, + {mmGB_TILE_MODE31}, + {mmGB_MACROTILE_MODE0}, + {mmGB_MACROTILE_MODE1}, + {mmGB_MACROTILE_MODE2}, + {mmGB_MACROTILE_MODE3}, + {mmGB_MACROTILE_MODE4}, + {mmGB_MACROTILE_MODE5}, + {mmGB_MACROTILE_MODE6}, + {mmGB_MACROTILE_MODE7}, + {mmGB_MACROTILE_MODE8}, + {mmGB_MACROTILE_MODE9}, + {mmGB_MACROTILE_MODE10}, + {mmGB_MACROTILE_MODE11}, + {mmGB_MACROTILE_MODE12}, + {mmGB_MACROTILE_MODE13}, + {mmGB_MACROTILE_MODE14}, + {mmGB_MACROTILE_MODE15}, + {mmCC_RB_BACKEND_DISABLE, true}, + {mmGC_USER_RB_BACKEND_DISABLE, true}, + {mmGB_BACKEND_MAP, false}, + {mmPA_SC_RASTER_CONFIG, true}, + {mmPA_SC_RASTER_CONFIG_1, true}, }; static uint32_t cik_read_indexed_register(struct amdgpu_device *adev, @@ -1050,11 +1050,10 @@ static int cik_read_register(struct amdgpu_device *adev, u32 se_num, if (reg_offset != cik_allowed_read_registers[i].reg_offset) continue; - if (!cik_allowed_read_registers[i].untouched) - *value = cik_allowed_read_registers[i].grbm_indexed ? - cik_read_indexed_register(adev, se_num, - sh_num, reg_offset) : - RREG32(reg_offset); + *value = cik_allowed_read_registers[i].grbm_indexed ? + cik_read_indexed_register(adev, se_num, + sh_num, reg_offset) : + RREG32(reg_offset); return 0; } return -EINVAL; @@ -1825,21 +1824,14 @@ static int cik_common_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_amdkfd_suspend(adev); - return cik_common_hw_fini(adev); } static int cik_common_resume(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = cik_common_hw_init(adev); - if (r) - return r; - - return amdgpu_amdkfd_resume(adev); + return cik_common_hw_init(adev); } static bool cik_common_is_idle(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index c57c3f18af01..b8918432c572 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -20,7 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_ih.h" #include "cikd.h" diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index c216e16826c9..f508f4d01e4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -342,6 +342,63 @@ static void cik_sdma_rlc_stop(struct amdgpu_device *adev) } /** + * cik_ctx_switch_enable - stop the async dma engines context switch + * + * @adev: amdgpu_device pointer + * @enable: enable/disable the DMA MEs context switch. + * + * Halt or unhalt the async dma engines context switch (VI). + */ +static void cik_ctx_switch_enable(struct amdgpu_device *adev, bool enable) +{ + u32 f32_cntl, phase_quantum = 0; + int i; + + if (amdgpu_sdma_phase_quantum) { + unsigned value = amdgpu_sdma_phase_quantum; + unsigned unit = 0; + + while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) { + value = (value + 1) >> 1; + unit++; + } + if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) { + value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT); + unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT); + WARN_ONCE(1, + "clamping sdma_phase_quantum to %uK clock cycles\n", + value << unit); + } + phase_quantum = + value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT | + unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT; + } + + for (i = 0; i < adev->sdma.num_instances; i++) { + f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]); + if (enable) { + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + AUTO_CTXSW_ENABLE, 1); + if (amdgpu_sdma_phase_quantum) { + WREG32(mmSDMA0_PHASE0_QUANTUM + sdma_offsets[i], + phase_quantum); + WREG32(mmSDMA0_PHASE1_QUANTUM + sdma_offsets[i], + phase_quantum); + } + } else { + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + AUTO_CTXSW_ENABLE, 0); + } + + WREG32(mmSDMA0_CNTL + sdma_offsets[i], f32_cntl); + } +} + +/** * cik_sdma_enable - stop the async dma engines * * @adev: amdgpu_device pointer @@ -537,6 +594,8 @@ static int cik_sdma_start(struct amdgpu_device *adev) /* halt the engine before programing */ cik_sdma_enable(adev, false); + /* enable sdma ring preemption */ + cik_ctx_switch_enable(adev, true); /* start the gfx rings and rlc compute queues */ r = cik_sdma_gfx_resume(adev); @@ -984,6 +1043,7 @@ static int cik_sdma_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + cik_ctx_switch_enable(adev, false); cik_sdma_enable(adev, false); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h index 18fd01f3e4b2..003a131bad47 100644 --- a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h +++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h @@ -1,24 +1,25 @@ - /* -*************************************************************************************************** -* -* Trade secret of Advanced Micro Devices, Inc. -* Copyright (c) 2010 Advanced Micro Devices, Inc. (unpublished) -* -* All rights reserved. This notice is intended as a precaution against inadvertent publication and -* does not imply publication or any waiver of confidentiality. The year included in the foregoing -* notice is the year of creation of the work. -* -*************************************************************************************************** -*/ -/** -*************************************************************************************************** -* @brief gfx9 Clearstate Definitions -*************************************************************************************************** -* -* Do not edit! This is a machine-generated file! -* -*/ + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ static const unsigned int gfx9_SECT_CONTEXT_def_1[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index a5f294ebff5c..0c1209cdd1cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -20,7 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_ih.h" #include "vid.h" diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 5dffa27afa45..4e519dc42916 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -20,7 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_i2c.h" @@ -484,134 +484,6 @@ static bool dce_v10_0_is_display_hung(struct amdgpu_device *adev) return true; } -static void dce_v10_0_stop_mc_access(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) -{ - u32 crtc_enabled, tmp; - int i; - - save->vga_render_control = RREG32(mmVGA_RENDER_CONTROL); - save->vga_hdp_control = RREG32(mmVGA_HDP_CONTROL); - - /* disable VGA render */ - tmp = RREG32(mmVGA_RENDER_CONTROL); - tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); - WREG32(mmVGA_RENDER_CONTROL, tmp); - - /* blank the display controllers */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]), - CRTC_CONTROL, CRTC_MASTER_EN); - if (crtc_enabled) { -#if 0 - u32 frame_count; - int j; - - save->crtc_enabled[i] = true; - tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN) == 0) { - amdgpu_display_vblank_wait(adev, i); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); - tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 1); - WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); - } - /* wait for the next frame */ - frame_count = amdgpu_display_vblank_get_counter(adev, i); - for (j = 0; j < adev->usec_timeout; j++) { - if (amdgpu_display_vblank_get_counter(adev, i) != frame_count) - break; - udelay(1); - } - tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK) == 0) { - tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 1); - WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp); - } - tmp = RREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK) == 0) { - tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK, 1); - WREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i], tmp); - } -#else - /* XXX this is a hack to avoid strange behavior with EFI on certain systems */ - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); - tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]); - tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0); - WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); - save->crtc_enabled[i] = false; - /* ***** */ -#endif - } else { - save->crtc_enabled[i] = false; - } - } -} - -static void dce_v10_0_resume_mc_access(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) -{ - u32 tmp, frame_count; - int i, j; - - /* update crtc base addresses */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i], - upper_32_bits(adev->mc.vram_start)); - WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i], - upper_32_bits(adev->mc.vram_start)); - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i], - (u32)adev->mc.vram_start); - WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + crtc_offsets[i], - (u32)adev->mc.vram_start); - - if (save->crtc_enabled[i]) { - tmp = RREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, MASTER_UPDATE_MODE, MASTER_UPDATE_MODE) != 0) { - tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_MODE, MASTER_UPDATE_MODE, 0); - WREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i], tmp); - } - tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK)) { - tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 0); - WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp); - } - tmp = RREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK)) { - tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK, 0); - WREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i], tmp); - } - for (j = 0; j < adev->usec_timeout; j++) { - tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_SURFACE_UPDATE_PENDING) == 0) - break; - udelay(1); - } - tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]); - tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 0); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); - WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); - /* wait for the next frame */ - frame_count = amdgpu_display_vblank_get_counter(adev, i); - for (j = 0; j < adev->usec_timeout; j++) { - if (amdgpu_display_vblank_get_counter(adev, i) != frame_count) - break; - udelay(1); - } - } - } - - WREG32(mmVGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(adev->mc.vram_start)); - WREG32(mmVGA_MEMORY_BASE_ADDRESS, lower_32_bits(adev->mc.vram_start)); - - /* Unlock vga access */ - WREG32(mmVGA_HDP_CONTROL, save->vga_hdp_control); - mdelay(1); - WREG32(mmVGA_RENDER_CONTROL, save->vga_render_control); -} - static void dce_v10_0_set_vga_render_state(struct amdgpu_device *adev, bool render) { @@ -1867,7 +1739,7 @@ static void dce_v10_0_afmt_setmode(struct drm_encoder *encoder, dce_v10_0_audio_write_sad_regs(encoder); dce_v10_0_audio_write_latency_fields(encoder, mode); - err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode); + err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode, false); if (err < 0) { DRM_ERROR("failed to setup AVI infoframe: %zd\n", err); return; @@ -2267,6 +2139,7 @@ static void dce_v10_0_crtc_load_lut(struct drm_crtc *crtc) struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; + u16 *r, *g, *b; int i; u32 tmp; @@ -2304,11 +2177,14 @@ static void dce_v10_0_crtc_load_lut(struct drm_crtc *crtc) WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007); WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0); + r = crtc->gamma_store; + g = r + crtc->gamma_size; + b = g + crtc->gamma_size; for (i = 0; i < 256; i++) { WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset, - (amdgpu_crtc->lut_r[i] << 20) | - (amdgpu_crtc->lut_g[i] << 10) | - (amdgpu_crtc->lut_b[i] << 0)); + ((*r++ & 0xffc0) << 14) | + ((*g++ & 0xffc0) << 4) | + (*b++ >> 6)); } tmp = RREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset); @@ -2555,7 +2431,7 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc, aobj = gem_to_amdgpu_bo(obj); ret = amdgpu_bo_reserve(aobj, false); if (ret != 0) { - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2563,7 +2439,7 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc, amdgpu_bo_unreserve(aobj); if (ret) { DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2597,7 +2473,7 @@ unpin: amdgpu_bo_unpin(aobj); amdgpu_bo_unreserve(aobj); } - drm_gem_object_unreference_unlocked(amdgpu_crtc->cursor_bo); + drm_gem_object_put_unlocked(amdgpu_crtc->cursor_bo); } amdgpu_crtc->cursor_bo = obj; @@ -2624,15 +2500,6 @@ static int dce_v10_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, u16 *blue, uint32_t size, struct drm_modeset_acquire_ctx *ctx) { - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - int i; - - /* userspace palettes are always correct as is */ - for (i = 0; i < size; i++) { - amdgpu_crtc->lut_r[i] = red[i] >> 6; - amdgpu_crtc->lut_g[i] = green[i] >> 6; - amdgpu_crtc->lut_b[i] = blue[i] >> 6; - } dce_v10_0_crtc_load_lut(crtc); return 0; @@ -2844,14 +2711,12 @@ static const struct drm_crtc_helper_funcs dce_v10_0_crtc_helper_funcs = { .mode_set_base_atomic = dce_v10_0_crtc_set_base_atomic, .prepare = dce_v10_0_crtc_prepare, .commit = dce_v10_0_crtc_commit, - .load_lut = dce_v10_0_crtc_load_lut, .disable = dce_v10_0_crtc_disable, }; static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; - int i; amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) + (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL); @@ -2869,12 +2734,6 @@ static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index) adev->ddev->mode_config.cursor_width = amdgpu_crtc->max_cursor_width; adev->ddev->mode_config.cursor_height = amdgpu_crtc->max_cursor_height; - for (i = 0; i < 256; i++) { - amdgpu_crtc->lut_r[i] = i << 2; - amdgpu_crtc->lut_g[i] = i << 2; - amdgpu_crtc->lut_b[i] = i << 2; - } - switch (amdgpu_crtc->crtc_id) { case 0: default: @@ -3025,6 +2884,8 @@ static int dce_v10_0_hw_init(void *handle) dce_v10_0_init_golden_registers(adev); + /* disable vga render */ + dce_v10_0_set_vga_render_state(adev, false); /* init dig PHYs, disp eng pll */ amdgpu_atombios_encoder_init_dig(adev); amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk); @@ -3737,7 +3598,6 @@ static void dce_v10_0_encoder_add(struct amdgpu_device *adev, } static const struct amdgpu_display_funcs dce_v10_0_display_funcs = { - .set_vga_render_state = &dce_v10_0_set_vga_render_state, .bandwidth_update = &dce_v10_0_bandwidth_update, .vblank_get_counter = &dce_v10_0_vblank_get_counter, .vblank_wait = &dce_v10_0_vblank_wait, @@ -3750,8 +3610,6 @@ static const struct amdgpu_display_funcs dce_v10_0_display_funcs = { .page_flip_get_scanoutpos = &dce_v10_0_crtc_get_scanoutpos, .add_encoder = &dce_v10_0_encoder_add, .add_connector = &amdgpu_connector_add, - .stop_mc_access = &dce_v10_0_stop_mc_access, - .resume_mc_access = &dce_v10_0_resume_mc_access, }; static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 47bbc87f96d2..11edc75edaa9 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -20,7 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_i2c.h" @@ -499,79 +499,6 @@ static bool dce_v11_0_is_display_hung(struct amdgpu_device *adev) return true; } -static void dce_v11_0_stop_mc_access(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) -{ - u32 crtc_enabled, tmp; - int i; - - save->vga_render_control = RREG32(mmVGA_RENDER_CONTROL); - save->vga_hdp_control = RREG32(mmVGA_HDP_CONTROL); - - /* disable VGA render */ - tmp = RREG32(mmVGA_RENDER_CONTROL); - tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); - WREG32(mmVGA_RENDER_CONTROL, tmp); - - /* blank the display controllers */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]), - CRTC_CONTROL, CRTC_MASTER_EN); - if (crtc_enabled) { -#if 1 - save->crtc_enabled[i] = true; - tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN) == 0) { - /*it is correct only for RGB ; black is 0*/ - WREG32(mmCRTC_BLANK_DATA_COLOR + crtc_offsets[i], 0); - tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 1); - WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp); - } -#else - /* XXX this is a hack to avoid strange behavior with EFI on certain systems */ - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); - tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]); - tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0); - WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); - save->crtc_enabled[i] = false; - /* ***** */ -#endif - } else { - save->crtc_enabled[i] = false; - } - } -} - -static void dce_v11_0_resume_mc_access(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) -{ - u32 tmp; - int i; - - /* update crtc base addresses */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i], - upper_32_bits(adev->mc.vram_start)); - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i], - (u32)adev->mc.vram_start); - - if (save->crtc_enabled[i]) { - tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]); - tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 0); - WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp); - } - } - - WREG32(mmVGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(adev->mc.vram_start)); - WREG32(mmVGA_MEMORY_BASE_ADDRESS, lower_32_bits(adev->mc.vram_start)); - - /* Unlock vga access */ - WREG32(mmVGA_HDP_CONTROL, save->vga_hdp_control); - mdelay(1); - WREG32(mmVGA_RENDER_CONTROL, save->vga_render_control); -} - static void dce_v11_0_set_vga_render_state(struct amdgpu_device *adev, bool render) { @@ -1851,7 +1778,7 @@ static void dce_v11_0_afmt_setmode(struct drm_encoder *encoder, dce_v11_0_audio_write_sad_regs(encoder); dce_v11_0_audio_write_latency_fields(encoder, mode); - err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode); + err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode, false); if (err < 0) { DRM_ERROR("failed to setup AVI infoframe: %zd\n", err); return; @@ -2251,6 +2178,7 @@ static void dce_v11_0_crtc_load_lut(struct drm_crtc *crtc) struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; + u16 *r, *g, *b; int i; u32 tmp; @@ -2282,11 +2210,14 @@ static void dce_v11_0_crtc_load_lut(struct drm_crtc *crtc) WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007); WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0); + r = crtc->gamma_store; + g = r + crtc->gamma_size; + b = g + crtc->gamma_size; for (i = 0; i < 256; i++) { WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset, - (amdgpu_crtc->lut_r[i] << 20) | - (amdgpu_crtc->lut_g[i] << 10) | - (amdgpu_crtc->lut_b[i] << 0)); + ((*r++ & 0xffc0) << 14) | + ((*g++ & 0xffc0) << 4) | + (*b++ >> 6)); } tmp = RREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset); @@ -2575,7 +2506,7 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, aobj = gem_to_amdgpu_bo(obj); ret = amdgpu_bo_reserve(aobj, false); if (ret != 0) { - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2583,7 +2514,7 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, amdgpu_bo_unreserve(aobj); if (ret) { DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2617,7 +2548,7 @@ unpin: amdgpu_bo_unpin(aobj); amdgpu_bo_unreserve(aobj); } - drm_gem_object_unreference_unlocked(amdgpu_crtc->cursor_bo); + drm_gem_object_put_unlocked(amdgpu_crtc->cursor_bo); } amdgpu_crtc->cursor_bo = obj; @@ -2644,15 +2575,6 @@ static int dce_v11_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, u16 *blue, uint32_t size, struct drm_modeset_acquire_ctx *ctx) { - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - int i; - - /* userspace palettes are always correct as is */ - for (i = 0; i < size; i++) { - amdgpu_crtc->lut_r[i] = red[i] >> 6; - amdgpu_crtc->lut_g[i] = green[i] >> 6; - amdgpu_crtc->lut_b[i] = blue[i] >> 6; - } dce_v11_0_crtc_load_lut(crtc); return 0; @@ -2892,14 +2814,12 @@ static const struct drm_crtc_helper_funcs dce_v11_0_crtc_helper_funcs = { .mode_set_base_atomic = dce_v11_0_crtc_set_base_atomic, .prepare = dce_v11_0_crtc_prepare, .commit = dce_v11_0_crtc_commit, - .load_lut = dce_v11_0_crtc_load_lut, .disable = dce_v11_0_crtc_disable, }; static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; - int i; amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) + (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL); @@ -2917,12 +2837,6 @@ static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index) adev->ddev->mode_config.cursor_width = amdgpu_crtc->max_cursor_width; adev->ddev->mode_config.cursor_height = amdgpu_crtc->max_cursor_height; - for (i = 0; i < 256; i++) { - amdgpu_crtc->lut_r[i] = i << 2; - amdgpu_crtc->lut_g[i] = i << 2; - amdgpu_crtc->lut_b[i] = i << 2; - } - switch (amdgpu_crtc->crtc_id) { case 0: default: @@ -3086,6 +3000,8 @@ static int dce_v11_0_hw_init(void *handle) dce_v11_0_init_golden_registers(adev); + /* disable vga render */ + dce_v11_0_set_vga_render_state(adev, false); /* init dig PHYs, disp eng pll */ amdgpu_atombios_crtc_powergate_init(adev); amdgpu_atombios_encoder_init_dig(adev); @@ -3806,7 +3722,6 @@ static void dce_v11_0_encoder_add(struct amdgpu_device *adev, } static const struct amdgpu_display_funcs dce_v11_0_display_funcs = { - .set_vga_render_state = &dce_v11_0_set_vga_render_state, .bandwidth_update = &dce_v11_0_bandwidth_update, .vblank_get_counter = &dce_v11_0_vblank_get_counter, .vblank_wait = &dce_v11_0_vblank_wait, @@ -3819,8 +3734,6 @@ static const struct amdgpu_display_funcs dce_v11_0_display_funcs = { .page_flip_get_scanoutpos = &dce_v11_0_crtc_get_scanoutpos, .add_encoder = &dce_v11_0_encoder_add, .add_connector = &amdgpu_connector_add, - .stop_mc_access = &dce_v11_0_stop_mc_access, - .resume_mc_access = &dce_v11_0_resume_mc_access, }; static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index d8c9a959493e..a51e35f824a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -20,7 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_i2c.h" @@ -42,6 +42,7 @@ #include "dce/dce_6_0_d.h" #include "dce/dce_6_0_sh_mask.h" #include "gca/gfx_7_2_enum.h" +#include "dce_v6_0.h" #include "si_enums.h" static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev); @@ -118,14 +119,27 @@ static const struct { static u32 dce_v6_0_audio_endpt_rreg(struct amdgpu_device *adev, u32 block_offset, u32 reg) { - DRM_INFO("xxxx: dce_v6_0_audio_endpt_rreg ----no impl!!!!\n"); - return 0; + unsigned long flags; + u32 r; + + spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags); + WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg); + r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset); + spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); + + return r; } static void dce_v6_0_audio_endpt_wreg(struct amdgpu_device *adev, u32 block_offset, u32 reg, u32 v) { - DRM_INFO("xxxx: dce_v6_0_audio_endpt_wreg ----no impl!!!!\n"); + unsigned long flags; + + spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags); + WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, + reg | AZALIA_F0_CODEC_ENDPOINT_INDEX__AZALIA_ENDPOINT_REG_WRITE_EN_MASK); + WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v); + spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); } static bool dce_v6_0_is_in_vblank(struct amdgpu_device *adev, int crtc) @@ -379,117 +393,6 @@ static u32 dce_v6_0_hpd_get_gpio_reg(struct amdgpu_device *adev) return mmDC_GPIO_HPD_A; } -static u32 evergreen_get_vblank_counter(struct amdgpu_device* adev, int crtc) -{ - if (crtc >= adev->mode_info.num_crtc) - return 0; - else - return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]); -} - -static void dce_v6_0_stop_mc_access(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) -{ - u32 crtc_enabled, tmp, frame_count; - int i, j; - - save->vga_render_control = RREG32(mmVGA_RENDER_CONTROL); - save->vga_hdp_control = RREG32(mmVGA_HDP_CONTROL); - - /* disable VGA render */ - WREG32(mmVGA_RENDER_CONTROL, 0); - - /* blank the display controllers */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - crtc_enabled = RREG32(mmCRTC_CONTROL + crtc_offsets[i]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK; - if (crtc_enabled) { - save->crtc_enabled[i] = true; - tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]); - - if (!(tmp & CRTC_BLANK_CONTROL__CRTC_BLANK_DATA_EN_MASK)) { - dce_v6_0_vblank_wait(adev, i); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); - tmp |= CRTC_BLANK_CONTROL__CRTC_BLANK_DATA_EN_MASK; - WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); - } - /* wait for the next frame */ - frame_count = evergreen_get_vblank_counter(adev, i); - for (j = 0; j < adev->usec_timeout; j++) { - if (evergreen_get_vblank_counter(adev, i) != frame_count) - break; - udelay(1); - } - - /* XXX this is a hack to avoid strange behavior with EFI on certain systems */ - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); - tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]); - tmp &= ~CRTC_CONTROL__CRTC_MASTER_EN_MASK; - WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); - save->crtc_enabled[i] = false; - /* ***** */ - } else { - save->crtc_enabled[i] = false; - } - } -} - -static void dce_v6_0_resume_mc_access(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) -{ - u32 tmp; - int i, j; - - /* update crtc base addresses */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i], - upper_32_bits(adev->mc.vram_start)); - WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i], - upper_32_bits(adev->mc.vram_start)); - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i], - (u32)adev->mc.vram_start); - WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + crtc_offsets[i], - (u32)adev->mc.vram_start); - } - - WREG32(mmVGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(adev->mc.vram_start)); - WREG32(mmVGA_MEMORY_BASE_ADDRESS, (u32)adev->mc.vram_start); - - /* unlock regs and wait for update */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - if (save->crtc_enabled[i]) { - tmp = RREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i]); - if ((tmp & 0x7) != 0) { - tmp &= ~0x7; - WREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i], tmp); - } - tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]); - if (tmp & GRPH_UPDATE__GRPH_UPDATE_LOCK_MASK) { - tmp &= ~GRPH_UPDATE__GRPH_UPDATE_LOCK_MASK; - WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp); - } - tmp = RREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i]); - if (tmp & 1) { - tmp &= ~1; - WREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i], tmp); - } - for (j = 0; j < adev->usec_timeout; j++) { - tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]); - if ((tmp & GRPH_UPDATE__GRPH_SURFACE_UPDATE_PENDING_MASK) == 0) - break; - udelay(1); - } - } - } - - /* Unlock vga access */ - WREG32(mmVGA_HDP_CONTROL, save->vga_hdp_control); - mdelay(1); - WREG32(mmVGA_RENDER_CONTROL, save->vga_render_control); - -} - static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev, bool render) { @@ -501,21 +404,16 @@ static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev, static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev) { - int num_crtc = 0; - switch (adev->asic_type) { case CHIP_TAHITI: case CHIP_PITCAIRN: case CHIP_VERDE: - num_crtc = 6; - break; + return 6; case CHIP_OLAND: - num_crtc = 2; - break; + return 2; default: - num_crtc = 0; + return 0; } - return num_crtc; } void dce_v6_0_disable_dce(struct amdgpu_device *adev) @@ -1225,17 +1123,17 @@ static void dce_v6_0_bandwidth_update(struct amdgpu_device *adev) dce_v6_0_program_watermarks(adev, adev->mode_info.crtcs[i+1], lb_size, num_heads); } } -/* + static void dce_v6_0_audio_get_connected_pins(struct amdgpu_device *adev) { int i; - u32 offset, tmp; + u32 tmp; for (i = 0; i < adev->mode_info.audio.num_pins; i++) { - offset = adev->mode_info.audio.pin[i].offset; - tmp = RREG32_AUDIO_ENDPT(offset, - AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT); - if (((tmp & PORT_CONNECTIVITY_MASK) >> PORT_CONNECTIVITY_SHIFT) == 1) + tmp = RREG32_AUDIO_ENDPT(adev->mode_info.audio.pin[i].offset, + ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT); + if (REG_GET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT, + PORT_CONNECTIVITY)) adev->mode_info.audio.pin[i].connected = false; else adev->mode_info.audio.pin[i].connected = true; @@ -1257,45 +1155,206 @@ static struct amdgpu_audio_pin *dce_v6_0_audio_get_pin(struct amdgpu_device *ade return NULL; } -static void dce_v6_0_afmt_audio_select_pin(struct drm_encoder *encoder) +static void dce_v6_0_audio_select_pin(struct drm_encoder *encoder) { struct amdgpu_device *adev = encoder->dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - u32 offset; if (!dig || !dig->afmt || !dig->afmt->pin) return; - offset = dig->afmt->offset; - - WREG32(AFMT_AUDIO_SRC_CONTROL + offset, - AFMT_AUDIO_SRC_SELECT(dig->afmt->pin->id)); - + WREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset, + REG_SET_FIELD(0, AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT, + dig->afmt->pin->id)); } static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder, struct drm_display_mode *mode) { - DRM_INFO("xxxx: dce_v6_0_audio_write_latency_fields---no imp!!!!!\n"); + struct amdgpu_device *adev = encoder->dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + struct drm_connector *connector; + struct amdgpu_connector *amdgpu_connector = NULL; + int interlace = 0; + u32 tmp; + + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + if (connector->encoder == encoder) { + amdgpu_connector = to_amdgpu_connector(connector); + break; + } + } + + if (!amdgpu_connector) { + DRM_ERROR("Couldn't find encoder's connector\n"); + return; + } + + if (mode->flags & DRM_MODE_FLAG_INTERLACE) + interlace = 1; + + if (connector->latency_present[interlace]) { + tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, + VIDEO_LIPSYNC, connector->video_latency[interlace]); + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, + AUDIO_LIPSYNC, connector->audio_latency[interlace]); + } else { + tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, + VIDEO_LIPSYNC, 0); + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, + AUDIO_LIPSYNC, 0); + } + WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, + ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp); } static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder) { - DRM_INFO("xxxx: dce_v6_0_audio_write_speaker_allocation---no imp!!!!!\n"); + struct amdgpu_device *adev = encoder->dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + struct drm_connector *connector; + struct amdgpu_connector *amdgpu_connector = NULL; + u8 *sadb = NULL; + int sad_count; + u32 tmp; + + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + if (connector->encoder == encoder) { + amdgpu_connector = to_amdgpu_connector(connector); + break; + } + } + + if (!amdgpu_connector) { + DRM_ERROR("Couldn't find encoder's connector\n"); + return; + } + + sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb); + if (sad_count < 0) { + DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); + sad_count = 0; + } + + /* program the speaker allocation */ + tmp = RREG32_AUDIO_ENDPT(dig->afmt->pin->offset, + ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER); + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, + HDMI_CONNECTION, 0); + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, + DP_CONNECTION, 0); + + if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, + DP_CONNECTION, 1); + else + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, + HDMI_CONNECTION, 1); + + if (sad_count) + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, + SPEAKER_ALLOCATION, sadb[0]); + else + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, + SPEAKER_ALLOCATION, 5); /* stereo */ + + WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, + ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp); + + kfree(sadb); } static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) { - DRM_INFO("xxxx: dce_v6_0_audio_write_sad_regs---no imp!!!!!\n"); + struct amdgpu_device *adev = encoder->dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + struct drm_connector *connector; + struct amdgpu_connector *amdgpu_connector = NULL; + struct cea_sad *sads; + int i, sad_count; + + static const u16 eld_reg_to_type[][2] = { + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO }, + }; + + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + if (connector->encoder == encoder) { + amdgpu_connector = to_amdgpu_connector(connector); + break; + } + } + + if (!amdgpu_connector) { + DRM_ERROR("Couldn't find encoder's connector\n"); + return; + } + + sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); + if (sad_count <= 0) { + DRM_ERROR("Couldn't read SADs: %d\n", sad_count); + return; + } + + for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { + u32 tmp = 0; + u8 stereo_freqs = 0; + int max_channels = -1; + int j; + + for (j = 0; j < sad_count; j++) { + struct cea_sad *sad = &sads[j]; + + if (sad->format == eld_reg_to_type[i][1]) { + if (sad->channels > max_channels) { + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, + MAX_CHANNELS, sad->channels); + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, + DESCRIPTOR_BYTE_2, sad->byte2); + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, + SUPPORTED_FREQUENCIES, sad->freq); + max_channels = sad->channels; + } + + if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM) + stereo_freqs |= sad->freq; + else + break; + } + } + + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, + SUPPORTED_FREQUENCIES_STEREO, stereo_freqs); + WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp); + } + + kfree(sads); } -*/ + static void dce_v6_0_audio_enable(struct amdgpu_device *adev, struct amdgpu_audio_pin *pin, bool enable) { - DRM_INFO("xxxx: dce_v6_0_audio_enable---no imp!!!!!\n"); + if (!pin) + return; + + WREG32_AUDIO_ENDPT(pin->offset, ixAZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, + enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0); } static const u32 pin_offsets[7] = @@ -1311,42 +1370,372 @@ static const u32 pin_offsets[7] = static int dce_v6_0_audio_init(struct amdgpu_device *adev) { + int i; + + if (!amdgpu_audio) + return 0; + + adev->mode_info.audio.enabled = true; + + switch (adev->asic_type) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + default: + adev->mode_info.audio.num_pins = 6; + break; + case CHIP_OLAND: + adev->mode_info.audio.num_pins = 2; + break; + } + + for (i = 0; i < adev->mode_info.audio.num_pins; i++) { + adev->mode_info.audio.pin[i].channels = -1; + adev->mode_info.audio.pin[i].rate = -1; + adev->mode_info.audio.pin[i].bits_per_sample = -1; + adev->mode_info.audio.pin[i].status_bits = 0; + adev->mode_info.audio.pin[i].category_code = 0; + adev->mode_info.audio.pin[i].connected = false; + adev->mode_info.audio.pin[i].offset = pin_offsets[i]; + adev->mode_info.audio.pin[i].id = i; + dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); + } + return 0; } static void dce_v6_0_audio_fini(struct amdgpu_device *adev) { + int i; + + if (!amdgpu_audio) + return; + + if (!adev->mode_info.audio.enabled) + return; + for (i = 0; i < adev->mode_info.audio.num_pins; i++) + dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); + + adev->mode_info.audio.enabled = false; } -/* -static void dce_v6_0_afmt_update_ACR(struct drm_encoder *encoder, uint32_t clock) +static void dce_v6_0_audio_set_vbi_packet(struct drm_encoder *encoder) { - DRM_INFO("xxxx: dce_v6_0_afmt_update_ACR---no imp!!!!!\n"); + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 tmp; + + tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1); + tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, 1); + tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, 1); + WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp); } -*/ -/* - * build a HDMI Video Info Frame - */ -/* -static void dce_v6_0_afmt_update_avi_infoframe(struct drm_encoder *encoder, - void *buffer, size_t size) + +static void dce_v6_0_audio_set_acr(struct drm_encoder *encoder, + uint32_t clock, int bpc) { - DRM_INFO("xxxx: dce_v6_0_afmt_update_avi_infoframe---no imp!!!!!\n"); + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_afmt_acr acr = amdgpu_afmt_acr(clock); + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 tmp; + + tmp = RREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, 1); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, + bpc > 8 ? 0 : 1); + WREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset, tmp); + + tmp = RREG32(mmHDMI_ACR_32_0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_0, HDMI_ACR_CTS_32, acr.cts_32khz); + WREG32(mmHDMI_ACR_32_0 + dig->afmt->offset, tmp); + tmp = RREG32(mmHDMI_ACR_32_1 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_1, HDMI_ACR_N_32, acr.n_32khz); + WREG32(mmHDMI_ACR_32_1 + dig->afmt->offset, tmp); + + tmp = RREG32(mmHDMI_ACR_44_0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_0, HDMI_ACR_CTS_44, acr.cts_44_1khz); + WREG32(mmHDMI_ACR_44_0 + dig->afmt->offset, tmp); + tmp = RREG32(mmHDMI_ACR_44_1 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_1, HDMI_ACR_N_44, acr.n_44_1khz); + WREG32(mmHDMI_ACR_44_1 + dig->afmt->offset, tmp); + + tmp = RREG32(mmHDMI_ACR_48_0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_0, HDMI_ACR_CTS_48, acr.cts_48khz); + WREG32(mmHDMI_ACR_48_0 + dig->afmt->offset, tmp); + tmp = RREG32(mmHDMI_ACR_48_1 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_1, HDMI_ACR_N_48, acr.n_48khz); + WREG32(mmHDMI_ACR_48_1 + dig->afmt->offset, tmp); +} + +static void dce_v6_0_audio_set_avi_infoframe(struct drm_encoder *encoder, + struct drm_display_mode *mode) +{ + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + struct hdmi_avi_infoframe frame; + u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE]; + uint8_t *payload = buffer + 3; + uint8_t *header = buffer; + ssize_t err; + u32 tmp; + + err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode, false); + if (err < 0) { + DRM_ERROR("failed to setup AVI infoframe: %zd\n", err); + return; + } + + err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer)); + if (err < 0) { + DRM_ERROR("failed to pack AVI infoframe: %zd\n", err); + return; + } + + WREG32(mmAFMT_AVI_INFO0 + dig->afmt->offset, + payload[0x0] | (payload[0x1] << 8) | (payload[0x2] << 16) | (payload[0x3] << 24)); + WREG32(mmAFMT_AVI_INFO1 + dig->afmt->offset, + payload[0x4] | (payload[0x5] << 8) | (payload[0x6] << 16) | (payload[0x7] << 24)); + WREG32(mmAFMT_AVI_INFO2 + dig->afmt->offset, + payload[0x8] | (payload[0x9] << 8) | (payload[0xA] << 16) | (payload[0xB] << 24)); + WREG32(mmAFMT_AVI_INFO3 + dig->afmt->offset, + payload[0xC] | (payload[0xD] << 8) | (header[1] << 24)); + + tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset); + /* anything other than 0 */ + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, + HDMI_AUDIO_INFO_LINE, 2); + WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp); } static void dce_v6_0_audio_set_dto(struct drm_encoder *encoder, u32 clock) { - DRM_INFO("xxxx: dce_v6_0_audio_set_dto---no imp!!!!!\n"); + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc); + int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); + u32 tmp; + + /* + * Two dtos: generally use dto0 for hdmi, dto1 for dp. + * Express [24MHz / target pixel clock] as an exact rational + * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE + * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator + */ + tmp = RREG32(mmDCCG_AUDIO_DTO_SOURCE); + tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, + DCCG_AUDIO_DTO0_SOURCE_SEL, amdgpu_crtc->crtc_id); + if (em == ATOM_ENCODER_MODE_HDMI) { + tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, + DCCG_AUDIO_DTO_SEL, 0); + } else if (ENCODER_MODE_IS_DP(em)) { + tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, + DCCG_AUDIO_DTO_SEL, 1); + } + WREG32(mmDCCG_AUDIO_DTO_SOURCE, tmp); + if (em == ATOM_ENCODER_MODE_HDMI) { + WREG32(mmDCCG_AUDIO_DTO0_PHASE, 24000); + WREG32(mmDCCG_AUDIO_DTO0_MODULE, clock); + } else if (ENCODER_MODE_IS_DP(em)) { + WREG32(mmDCCG_AUDIO_DTO1_PHASE, 24000); + WREG32(mmDCCG_AUDIO_DTO1_MODULE, clock); + } +} + +static void dce_v6_0_audio_set_packet(struct drm_encoder *encoder) +{ + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 tmp; + + tmp = RREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1); + WREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_60958_0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, 1); + WREG32(mmAFMT_60958_0 + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_60958_1 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2); + WREG32(mmAFMT_60958_1 + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_60958_2 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, 3); + tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, 4); + tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, 5); + tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, 6); + tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, 7); + tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, 8); + WREG32(mmAFMT_60958_2 + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_CHANNEL_ENABLE, 0xff); + WREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset, tmp); + + tmp = RREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, 1); + tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, 3); + WREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_RESET_FIFO_WHEN_AUDIO_DIS, 1); + tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1); + WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); } -*/ -/* - * update the info frames with the data from the current display mode - */ + +static void dce_v6_0_audio_set_mute(struct drm_encoder *encoder, bool mute) +{ + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 tmp; + + tmp = RREG32(mmHDMI_GC + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_GC, HDMI_GC_AVMUTE, mute ? 1 : 0); + WREG32(mmHDMI_GC + dig->afmt->offset, tmp); +} + +static void dce_v6_0_audio_hdmi_enable(struct drm_encoder *encoder, bool enable) +{ + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 tmp; + + if (enable) { + tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 1); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 1); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 1); + WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp); + + tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AVI_INFO_LINE, 2); + WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1); + WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); + } else { + tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 0); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 0); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 0); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 0); + WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 0); + WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); + } +} + +static void dce_v6_0_audio_dp_enable(struct drm_encoder *encoder, bool enable) +{ + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 tmp; + + if (enable) { + tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1); + WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); + + tmp = RREG32(mmDP_SEC_TIMESTAMP + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, 1); + WREG32(mmDP_SEC_TIMESTAMP + dig->afmt->offset, tmp); + + tmp = RREG32(mmDP_SEC_CNTL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_ASP_ENABLE, 1); + tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_ATP_ENABLE, 1); + tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_AIP_ENABLE, 1); + tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); + WREG32(mmDP_SEC_CNTL + dig->afmt->offset, tmp); + } else { + WREG32(mmDP_SEC_CNTL + dig->afmt->offset, 0); + } +} + static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder, struct drm_display_mode *mode) { - DRM_INFO("xxxx: dce_v6_0_afmt_setmode ----no impl !!!!!!!!\n"); + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + struct drm_connector *connector; + struct amdgpu_connector *amdgpu_connector = NULL; + int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); + int bpc = 8; + + if (!dig || !dig->afmt) + return; + + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + if (connector->encoder == encoder) { + amdgpu_connector = to_amdgpu_connector(connector); + break; + } + } + + if (!amdgpu_connector) { + DRM_ERROR("Couldn't find encoder's connector\n"); + return; + } + + if (!dig->afmt->enabled) + return; + + dig->afmt->pin = dce_v6_0_audio_get_pin(adev); + if (!dig->afmt->pin) + return; + + if (encoder->crtc) { + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc); + bpc = amdgpu_crtc->bpc; + } + + /* disable audio before setting up hw */ + dce_v6_0_audio_enable(adev, dig->afmt->pin, false); + + dce_v6_0_audio_set_mute(encoder, true); + dce_v6_0_audio_write_speaker_allocation(encoder); + dce_v6_0_audio_write_sad_regs(encoder); + dce_v6_0_audio_write_latency_fields(encoder, mode); + if (em == ATOM_ENCODER_MODE_HDMI) { + dce_v6_0_audio_set_dto(encoder, mode->clock); + dce_v6_0_audio_set_vbi_packet(encoder); + dce_v6_0_audio_set_acr(encoder, mode->clock, bpc); + } else if (ENCODER_MODE_IS_DP(em)) { + dce_v6_0_audio_set_dto(encoder, adev->clock.default_dispclk * 10); + } + dce_v6_0_audio_set_packet(encoder); + dce_v6_0_audio_select_pin(encoder); + dce_v6_0_audio_set_avi_infoframe(encoder, mode); + dce_v6_0_audio_set_mute(encoder, false); + if (em == ATOM_ENCODER_MODE_HDMI) { + dce_v6_0_audio_hdmi_enable(encoder, 1); + } else if (ENCODER_MODE_IS_DP(em)) { + dce_v6_0_audio_dp_enable(encoder, 1); + } + + /* enable audio after setting up hw */ + dce_v6_0_audio_enable(adev, dig->afmt->pin, true); } static void dce_v6_0_afmt_enable(struct drm_encoder *encoder, bool enable) @@ -1362,6 +1751,7 @@ static void dce_v6_0_afmt_enable(struct drm_encoder *encoder, bool enable) /* Silent, r600_hdmi_enable will raise WARN for us */ if (enable && dig->afmt->enabled) return; + if (!enable && !dig->afmt->enabled) return; @@ -1682,6 +2072,7 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc) struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; + u16 *r, *g, *b; int i; DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id); @@ -1711,11 +2102,14 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc) WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007); WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0); + r = crtc->gamma_store; + g = r + crtc->gamma_size; + b = g + crtc->gamma_size; for (i = 0; i < 256; i++) { WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset, - (amdgpu_crtc->lut_r[i] << 20) | - (amdgpu_crtc->lut_g[i] << 10) | - (amdgpu_crtc->lut_b[i] << 0)); + ((*r++ & 0xffc0) << 14) | + ((*g++ & 0xffc0) << 4) | + (*b++ >> 6)); } WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset, @@ -1928,7 +2322,7 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc, aobj = gem_to_amdgpu_bo(obj); ret = amdgpu_bo_reserve(aobj, false); if (ret != 0) { - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -1936,7 +2330,7 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc, amdgpu_bo_unreserve(aobj); if (ret) { DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -1970,7 +2364,7 @@ unpin: amdgpu_bo_unpin(aobj); amdgpu_bo_unreserve(aobj); } - drm_gem_object_unreference_unlocked(amdgpu_crtc->cursor_bo); + drm_gem_object_put_unlocked(amdgpu_crtc->cursor_bo); } amdgpu_crtc->cursor_bo = obj; @@ -1996,15 +2390,6 @@ static int dce_v6_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, u16 *blue, uint32_t size, struct drm_modeset_acquire_ctx *ctx) { - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - int i; - - /* userspace palettes are always correct as is */ - for (i = 0; i < size; i++) { - amdgpu_crtc->lut_r[i] = red[i] >> 6; - amdgpu_crtc->lut_g[i] = green[i] >> 6; - amdgpu_crtc->lut_b[i] = blue[i] >> 6; - } dce_v6_0_crtc_load_lut(crtc); return 0; @@ -2212,14 +2597,12 @@ static const struct drm_crtc_helper_funcs dce_v6_0_crtc_helper_funcs = { .mode_set_base_atomic = dce_v6_0_crtc_set_base_atomic, .prepare = dce_v6_0_crtc_prepare, .commit = dce_v6_0_crtc_commit, - .load_lut = dce_v6_0_crtc_load_lut, .disable = dce_v6_0_crtc_disable, }; static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; - int i; amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) + (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL); @@ -2237,12 +2620,6 @@ static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index) adev->ddev->mode_config.cursor_width = amdgpu_crtc->max_cursor_width; adev->ddev->mode_config.cursor_height = amdgpu_crtc->max_cursor_height; - for (i = 0; i < 256; i++) { - amdgpu_crtc->lut_r[i] = i << 2; - amdgpu_crtc->lut_g[i] = i << 2; - amdgpu_crtc->lut_b[i] = i << 2; - } - amdgpu_crtc->crtc_offset = crtc_offsets[amdgpu_crtc->crtc_id]; amdgpu_crtc->pll_id = ATOM_PPLL_INVALID; @@ -2373,6 +2750,8 @@ static int dce_v6_0_hw_init(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* disable vga render */ + dce_v6_0_set_vga_render_state(adev, false); /* init dig PHYs, disp eng pll */ amdgpu_atombios_encoder_init_dig(adev); amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk); @@ -2756,6 +3135,7 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder, { struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); amdgpu_encoder->pixel_clock = adjusted_mode->clock; @@ -2765,7 +3145,7 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder, /* set scaler clears this on some chips */ dce_v6_0_set_interleave(encoder->crtc, mode); - if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) { + if (em == ATOM_ENCODER_MODE_HDMI || ENCODER_MODE_IS_DP(em)) { dce_v6_0_afmt_enable(encoder, true); dce_v6_0_afmt_setmode(encoder, adjusted_mode); } @@ -2827,11 +3207,12 @@ static void dce_v6_0_encoder_disable(struct drm_encoder *encoder) struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig; + int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF); if (amdgpu_atombios_encoder_is_digital(encoder)) { - if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) + if (em == ATOM_ENCODER_MODE_HDMI || ENCODER_MODE_IS_DP(em)) dce_v6_0_afmt_enable(encoder, false); dig = amdgpu_encoder->enc_priv; dig->dig_encoder = -1; @@ -3023,7 +3404,6 @@ static void dce_v6_0_encoder_add(struct amdgpu_device *adev, } static const struct amdgpu_display_funcs dce_v6_0_display_funcs = { - .set_vga_render_state = &dce_v6_0_set_vga_render_state, .bandwidth_update = &dce_v6_0_bandwidth_update, .vblank_get_counter = &dce_v6_0_vblank_get_counter, .vblank_wait = &dce_v6_0_vblank_wait, @@ -3036,8 +3416,6 @@ static const struct amdgpu_display_funcs dce_v6_0_display_funcs = { .page_flip_get_scanoutpos = &dce_v6_0_crtc_get_scanoutpos, .add_encoder = &dce_v6_0_encoder_add, .add_connector = &amdgpu_connector_add, - .stop_mc_access = &dce_v6_0_stop_mc_access, - .resume_mc_access = &dce_v6_0_resume_mc_access, }; static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index db30c6ba563a..9cf14b8b2db9 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -20,7 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_i2c.h" @@ -419,81 +419,6 @@ static bool dce_v8_0_is_display_hung(struct amdgpu_device *adev) return true; } -static void dce_v8_0_stop_mc_access(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) -{ - u32 crtc_enabled, tmp; - int i; - - save->vga_render_control = RREG32(mmVGA_RENDER_CONTROL); - save->vga_hdp_control = RREG32(mmVGA_HDP_CONTROL); - - /* disable VGA render */ - tmp = RREG32(mmVGA_RENDER_CONTROL); - tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); - WREG32(mmVGA_RENDER_CONTROL, tmp); - - /* blank the display controllers */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]), - CRTC_CONTROL, CRTC_MASTER_EN); - if (crtc_enabled) { -#if 1 - save->crtc_enabled[i] = true; - tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN) == 0) { - /*it is correct only for RGB ; black is 0*/ - WREG32(mmCRTC_BLANK_DATA_COLOR + crtc_offsets[i], 0); - tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 1); - WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp); - } - mdelay(20); -#else - /* XXX this is a hack to avoid strange behavior with EFI on certain systems */ - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); - tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]); - tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0); - WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); - save->crtc_enabled[i] = false; - /* ***** */ -#endif - } else { - save->crtc_enabled[i] = false; - } - } -} - -static void dce_v8_0_resume_mc_access(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) -{ - u32 tmp; - int i; - - /* update crtc base addresses */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i], - upper_32_bits(adev->mc.vram_start)); - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i], - (u32)adev->mc.vram_start); - - if (save->crtc_enabled[i]) { - tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]); - tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 0); - WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp); - } - mdelay(20); - } - - WREG32(mmVGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(adev->mc.vram_start)); - WREG32(mmVGA_MEMORY_BASE_ADDRESS, lower_32_bits(adev->mc.vram_start)); - - /* Unlock vga access */ - WREG32(mmVGA_HDP_CONTROL, save->vga_hdp_control); - mdelay(1); - WREG32(mmVGA_RENDER_CONTROL, save->vga_render_control); -} - static void dce_v8_0_set_vga_render_state(struct amdgpu_device *adev, bool render) { @@ -1750,7 +1675,7 @@ static void dce_v8_0_afmt_setmode(struct drm_encoder *encoder, dce_v8_0_audio_write_sad_regs(encoder); dce_v8_0_audio_write_latency_fields(encoder, mode); - err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode); + err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode, false); if (err < 0) { DRM_ERROR("failed to setup AVI infoframe: %zd\n", err); return; @@ -2124,6 +2049,7 @@ static void dce_v8_0_crtc_load_lut(struct drm_crtc *crtc) struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; + u16 *r, *g, *b; int i; DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id); @@ -2153,11 +2079,14 @@ static void dce_v8_0_crtc_load_lut(struct drm_crtc *crtc) WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007); WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0); + r = crtc->gamma_store; + g = r + crtc->gamma_size; + b = g + crtc->gamma_size; for (i = 0; i < 256; i++) { WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset, - (amdgpu_crtc->lut_r[i] << 20) | - (amdgpu_crtc->lut_g[i] << 10) | - (amdgpu_crtc->lut_b[i] << 0)); + ((*r++ & 0xffc0) << 14) | + ((*g++ & 0xffc0) << 4) | + (*b++ >> 6)); } WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset, @@ -2406,7 +2335,7 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc, aobj = gem_to_amdgpu_bo(obj); ret = amdgpu_bo_reserve(aobj, false); if (ret != 0) { - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2414,7 +2343,7 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc, amdgpu_bo_unreserve(aobj); if (ret) { DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2448,7 +2377,7 @@ unpin: amdgpu_bo_unpin(aobj); amdgpu_bo_unreserve(aobj); } - drm_gem_object_unreference_unlocked(amdgpu_crtc->cursor_bo); + drm_gem_object_put_unlocked(amdgpu_crtc->cursor_bo); } amdgpu_crtc->cursor_bo = obj; @@ -2475,15 +2404,6 @@ static int dce_v8_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, u16 *blue, uint32_t size, struct drm_modeset_acquire_ctx *ctx) { - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - int i; - - /* userspace palettes are always correct as is */ - for (i = 0; i < size; i++) { - amdgpu_crtc->lut_r[i] = red[i] >> 6; - amdgpu_crtc->lut_g[i] = green[i] >> 6; - amdgpu_crtc->lut_b[i] = blue[i] >> 6; - } dce_v8_0_crtc_load_lut(crtc); return 0; @@ -2702,14 +2622,12 @@ static const struct drm_crtc_helper_funcs dce_v8_0_crtc_helper_funcs = { .mode_set_base_atomic = dce_v8_0_crtc_set_base_atomic, .prepare = dce_v8_0_crtc_prepare, .commit = dce_v8_0_crtc_commit, - .load_lut = dce_v8_0_crtc_load_lut, .disable = dce_v8_0_crtc_disable, }; static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; - int i; amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) + (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL); @@ -2727,12 +2645,6 @@ static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index) adev->ddev->mode_config.cursor_width = amdgpu_crtc->max_cursor_width; adev->ddev->mode_config.cursor_height = amdgpu_crtc->max_cursor_height; - for (i = 0; i < 256; i++) { - amdgpu_crtc->lut_r[i] = i << 2; - amdgpu_crtc->lut_g[i] = i << 2; - amdgpu_crtc->lut_b[i] = i << 2; - } - amdgpu_crtc->crtc_offset = crtc_offsets[amdgpu_crtc->crtc_id]; amdgpu_crtc->pll_id = ATOM_PPLL_INVALID; @@ -2870,6 +2782,8 @@ static int dce_v8_0_hw_init(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* disable vga render */ + dce_v8_0_set_vga_render_state(adev, false); /* init dig PHYs, disp eng pll */ amdgpu_atombios_encoder_init_dig(adev); amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk); @@ -3574,7 +3488,6 @@ static void dce_v8_0_encoder_add(struct amdgpu_device *adev, } static const struct amdgpu_display_funcs dce_v8_0_display_funcs = { - .set_vga_render_state = &dce_v8_0_set_vga_render_state, .bandwidth_update = &dce_v8_0_bandwidth_update, .vblank_get_counter = &dce_v8_0_vblank_get_counter, .vblank_wait = &dce_v8_0_vblank_wait, @@ -3587,8 +3500,6 @@ static const struct amdgpu_display_funcs dce_v8_0_display_funcs = { .page_flip_get_scanoutpos = &dce_v8_0_crtc_get_scanoutpos, .add_encoder = &dce_v8_0_encoder_add, .add_connector = &amdgpu_connector_add, - .stop_mc_access = &dce_v8_0_stop_mc_access, - .resume_mc_access = &dce_v8_0_resume_mc_access, }; static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index f1b479b6ac98..b9ee9073cb0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -20,7 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_i2c.h" @@ -95,62 +95,6 @@ static u32 dce_virtual_hpd_get_gpio_reg(struct amdgpu_device *adev) return 0; } -static void dce_virtual_stop_mc_access(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) -{ - switch (adev->asic_type) { -#ifdef CONFIG_DRM_AMDGPU_SI - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - dce_v6_0_disable_dce(adev); - break; -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_KAVERI: - case CHIP_KABINI: - case CHIP_MULLINS: - dce_v8_0_disable_dce(adev); - break; -#endif - case CHIP_FIJI: - case CHIP_TONGA: - dce_v10_0_disable_dce(adev); - break; - case CHIP_CARRIZO: - case CHIP_STONEY: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_POLARIS12: - dce_v11_0_disable_dce(adev); - break; - case CHIP_TOPAZ: -#ifdef CONFIG_DRM_AMDGPU_SI - case CHIP_HAINAN: -#endif - /* no DCE */ - return; - default: - DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type); - } - - return; -} -static void dce_virtual_resume_mc_access(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) -{ - return; -} - -static void dce_virtual_set_vga_render_state(struct amdgpu_device *adev, - bool render) -{ - return; -} - /** * dce_virtual_bandwidth_update - program display watermarks * @@ -168,16 +112,6 @@ static int dce_virtual_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, u16 *blue, uint32_t size, struct drm_modeset_acquire_ctx *ctx) { - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - int i; - - /* userspace palettes are always correct as is */ - for (i = 0; i < size; i++) { - amdgpu_crtc->lut_r[i] = red[i] >> 6; - amdgpu_crtc->lut_g[i] = green[i] >> 6; - amdgpu_crtc->lut_b[i] = blue[i] >> 6; - } - return 0; } @@ -289,11 +223,6 @@ static int dce_virtual_crtc_set_base(struct drm_crtc *crtc, int x, int y, return 0; } -static void dce_virtual_crtc_load_lut(struct drm_crtc *crtc) -{ - return; -} - static int dce_virtual_crtc_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb, int x, int y, enum mode_set_atomic state) @@ -309,14 +238,12 @@ static const struct drm_crtc_helper_funcs dce_virtual_crtc_helper_funcs = { .mode_set_base_atomic = dce_virtual_crtc_set_base_atomic, .prepare = dce_virtual_crtc_prepare, .commit = dce_virtual_crtc_commit, - .load_lut = dce_virtual_crtc_load_lut, .disable = dce_virtual_crtc_disable, }; static int dce_virtual_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; - int i; amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) + (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL); @@ -329,12 +256,6 @@ static int dce_virtual_crtc_init(struct amdgpu_device *adev, int index) amdgpu_crtc->crtc_id = index; adev->mode_info.crtcs[index] = amdgpu_crtc; - for (i = 0; i < 256; i++) { - amdgpu_crtc->lut_r[i] = i << 2; - amdgpu_crtc->lut_g[i] = i << 2; - amdgpu_crtc->lut_b[i] = i << 2; - } - amdgpu_crtc->pll_id = ATOM_PPLL_INVALID; amdgpu_crtc->encoder = NULL; amdgpu_crtc->connector = NULL; @@ -522,6 +443,47 @@ static int dce_virtual_sw_fini(void *handle) static int dce_virtual_hw_init(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (adev->asic_type) { +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + dce_v6_0_disable_dce(adev); + break; +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KAVERI: + case CHIP_KABINI: + case CHIP_MULLINS: + dce_v8_0_disable_dce(adev); + break; +#endif + case CHIP_FIJI: + case CHIP_TONGA: + dce_v10_0_disable_dce(adev); + break; + case CHIP_CARRIZO: + case CHIP_STONEY: + case CHIP_POLARIS11: + case CHIP_POLARIS10: + dce_v11_0_disable_dce(adev); + break; + case CHIP_TOPAZ: +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_HAINAN: +#endif + /* no DCE */ + break; + case CHIP_VEGA10: + break; + default: + DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type); + } return 0; } @@ -677,7 +639,6 @@ static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev, } static const struct amdgpu_display_funcs dce_virtual_display_funcs = { - .set_vga_render_state = &dce_virtual_set_vga_render_state, .bandwidth_update = &dce_virtual_bandwidth_update, .vblank_get_counter = &dce_virtual_vblank_get_counter, .vblank_wait = &dce_virtual_vblank_wait, @@ -690,8 +651,6 @@ static const struct amdgpu_display_funcs dce_virtual_display_funcs = { .page_flip_get_scanoutpos = &dce_virtual_crtc_get_scanoutpos, .add_encoder = NULL, .add_connector = NULL, - .stop_mc_access = &dce_virtual_stop_mc_access, - .resume_mc_access = &dce_virtual_resume_mc_access, }; static void dce_virtual_set_display_funcs(struct amdgpu_device *adev) @@ -809,7 +768,7 @@ static const struct amdgpu_irq_src_funcs dce_virtual_crtc_irq_funcs = { static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev) { - adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_LAST; + adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VBLANK6 + 1; adev->crtc_irq.funcs = &dce_virtual_crtc_irq_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index a125f9d44577..dbbe986f90f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -393,8 +393,11 @@ out: static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) { - const u32 num_tile_mode_states = 32; - u32 reg_offset, gb_tile_moden, split_equal_to_row_size; + const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); + u32 reg_offset, split_equal_to_row_size, *tilemode; + + memset(adev->gfx.config.tile_mode_array, 0, sizeof(adev->gfx.config.tile_mode_array)); + tilemode = adev->gfx.config.tile_mode_array; switch (adev->gfx.config.mem_row_size_in_kb) { case 1: @@ -410,887 +413,867 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) } if (adev->asic_type == CHIP_VERDE) { - for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { - switch (reg_offset) { - case 0: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 1: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 2: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 3: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 4: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16)); - break; - case 5: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 6: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 7: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 8: - gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED)); - break; - case 9: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16)); - break; - case 10: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 11: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 12: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 13: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16)); - break; - case 14: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 15: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 16: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 17: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 18: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P4_8x16)); - break; - case 19: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 20: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 21: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 22: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 23: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 24: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 25: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 26: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 27: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 28: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 29: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 30: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - default: - continue; - } - adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; - WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); - } - } else if (adev->asic_type == CHIP_OLAND || - adev->asic_type == CHIP_HAINAN) { - for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { - switch (reg_offset) { - case 0: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 1: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 2: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 3: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 4: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2)); - break; - case 5: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 6: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 7: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 8: - gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED)); - break; - case 9: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2)); - break; - case 10: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 11: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 12: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 13: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2)); - break; - case 14: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 15: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 16: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 17: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 18: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P2)); - break; - case 19: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | - PIPE_CONFIG(ADDR_SURF_P2) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 20: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P2) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 21: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 22: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 23: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 24: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 25: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 26: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 27: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 28: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 29: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 30: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - default: - continue; - } - adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; - WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); - } + tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16); + tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED); + tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16); + tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16); + tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_8x16); + tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[26] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[27] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[28] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[29] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[30] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) + WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]); + } else if (adev->asic_type == CHIP_OLAND) { + tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); + tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); + tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); + tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); + tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); + tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(split_equal_to_row_size) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); + tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(split_equal_to_row_size) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); + tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(split_equal_to_row_size) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); + tilemode[8] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); + tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); + tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); + tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); + tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); + tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); + tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); + tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); + tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); + tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(split_equal_to_row_size) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); + tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); + tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); + tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); + tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); + tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + NUM_BANKS(ADDR_SURF_8_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1); + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) + WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]); + } else if (adev->asic_type == CHIP_HAINAN) { + tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2); + tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED); + tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2); + tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2); + tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P2); + tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P2) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P2) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[26] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[27] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[28] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[29] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[30] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) + WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]); } else if ((adev->asic_type == CHIP_TAHITI) || (adev->asic_type == CHIP_PITCAIRN)) { - for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { - switch (reg_offset) { - case 0: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 1: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 2: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 3: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 4: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16)); - break; - case 5: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 6: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 7: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 8: - gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED)); - break; - case 9: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16)); - break; - case 10: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 11: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 12: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 13: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16)); - break; - case 14: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 15: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 16: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 17: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 18: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16)); - break; - case 19: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 20: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 21: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 22: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 23: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 24: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 25: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 26: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 27: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 28: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 29: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 30: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - default: - continue; - } - adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; - WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); - } - } else{ - + tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16); + tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED); + tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16); + tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16); + tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16); + tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[26] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[27] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[28] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[29] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[30] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) + WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]); + } else { DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); } - } static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, @@ -1318,11 +1301,6 @@ static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, WREG32(mmGRBM_GFX_INDEX, data); } -static u32 gfx_v6_0_create_bitmask(u32 bit_width) -{ - return (u32)(((u64)1 << bit_width) - 1); -} - static u32 gfx_v6_0_get_rb_active_bitmap(struct amdgpu_device *adev) { u32 data, mask; @@ -1332,8 +1310,8 @@ static u32 gfx_v6_0_get_rb_active_bitmap(struct amdgpu_device *adev) data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); - mask = gfx_v6_0_create_bitmask(adev->gfx.config.max_backends_per_se/ - adev->gfx.config.max_sh_per_se); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se/ + adev->gfx.config.max_sh_per_se); return ~data & mask; } @@ -1399,11 +1377,10 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { raster_config_se &= ~PA_SC_RASTER_CONFIG__SE_MAP_MASK; - if (!se_mask[idx]) { + if (!se_mask[idx]) raster_config_se |= RASTER_CONFIG_SE_MAP_3 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT; - } else { + else raster_config_se |= RASTER_CONFIG_SE_MAP_0 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT; - } } pkr0_mask &= rb_mask; @@ -1411,11 +1388,10 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { raster_config_se &= ~PA_SC_RASTER_CONFIG__PKR_MAP_MASK; - if (!pkr0_mask) { + if (!pkr0_mask) raster_config_se |= RASTER_CONFIG_PKR_MAP_3 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT; - } else { + else raster_config_se |= RASTER_CONFIG_PKR_MAP_0 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT; - } } if (rb_per_se >= 2) { @@ -1427,13 +1403,12 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, if (!rb0_mask || !rb1_mask) { raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR0_MASK; - if (!rb0_mask) { + if (!rb0_mask) raster_config_se |= RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT; - } else { + else raster_config_se |= RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT; - } } if (rb_per_se > 2) { @@ -1444,13 +1419,12 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, if (!rb0_mask || !rb1_mask) { raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR1_MASK; - if (!rb0_mask) { + if (!rb0_mask) raster_config_se |= RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT; - } else { + else raster_config_se |= RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT; - } } } } @@ -1479,8 +1453,9 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff); data = gfx_v6_0_get_rb_active_bitmap(adev); - active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * - rb_bitmap_width_per_sh); + active_rbs |= data << + ((i * adev->gfx.config.max_sh_per_se + j) * + rb_bitmap_width_per_sh); } } gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); @@ -1494,13 +1469,12 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) gfx_v6_0_raster_config(adev, &raster_config); if (!adev->gfx.config.backend_enable_mask || - adev->gfx.config.num_rbs >= num_rb_pipes) { + adev->gfx.config.num_rbs >= num_rb_pipes) WREG32(mmPA_SC_RASTER_CONFIG, raster_config); - } else { + else gfx_v6_0_write_harvested_raster_configs(adev, raster_config, adev->gfx.config.backend_enable_mask, num_rb_pipes); - } /* cache the values for userspace */ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { @@ -1517,11 +1491,6 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); } -/* -static void gmc_v6_0_init_compute_vmid(struct amdgpu_device *adev) -{ -} -*/ static void gfx_v6_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, u32 bitmap) @@ -1544,7 +1513,7 @@ static u32 gfx_v6_0_get_cu_enabled(struct amdgpu_device *adev) data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); - mask = gfx_v6_0_create_bitmask(adev->gfx.config.max_cu_per_sh); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; } @@ -1688,7 +1657,8 @@ static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); - mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); + adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); + mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; adev->gfx.config.mem_max_burst_length_bytes = 256; @@ -1790,7 +1760,7 @@ static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) static void gfx_v6_0_scratch_init(struct amdgpu_device *adev) { - adev->gfx.scratch.num_reg = 7; + adev->gfx.scratch.num_reg = 8; adev->gfx.scratch.reg_base = mmSCRATCH_REG0; adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; } @@ -2434,40 +2404,9 @@ static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev) { - int r; - - if (adev->gfx.rlc.save_restore_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve RLC sr bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.rlc.save_restore_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); - - amdgpu_bo_unref(&adev->gfx.rlc.save_restore_obj); - adev->gfx.rlc.save_restore_obj = NULL; - } - - if (adev->gfx.rlc.clear_state_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - - amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); - adev->gfx.rlc.clear_state_obj = NULL; - } - - if (adev->gfx.rlc.cp_table_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); - - amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj); - adev->gfx.rlc.cp_table_obj = NULL; - } + amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); } static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) @@ -2490,43 +2429,23 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) if (src_ptr) { /* save restore block */ - if (adev->gfx.rlc.save_restore_obj == NULL) { - r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - NULL, NULL, - &adev->gfx.rlc.save_restore_obj); - - if (r) { - dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r); - return r; - } - } - - r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, false); - if (unlikely(r != 0)) { - gfx_v6_0_rlc_fini(adev); - return r; - } - r = amdgpu_bo_pin(adev->gfx.rlc.save_restore_obj, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.save_restore_gpu_addr); + r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.save_restore_obj, + &adev->gfx.rlc.save_restore_gpu_addr, + (void **)&adev->gfx.rlc.sr_ptr); if (r) { - amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); - dev_warn(adev->dev, "(%d) pin RLC sr bo failed\n", r); + dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", + r); gfx_v6_0_rlc_fini(adev); return r; } - r = amdgpu_bo_kmap(adev->gfx.rlc.save_restore_obj, (void **)&adev->gfx.rlc.sr_ptr); - if (r) { - dev_warn(adev->dev, "(%d) map RLC sr bo failed\n", r); - gfx_v6_0_rlc_fini(adev); - return r; - } /* write the sr buffer */ dst_ptr = adev->gfx.rlc.sr_ptr; for (i = 0; i < adev->gfx.rlc.reg_list_size; i++) dst_ptr[i] = cpu_to_le32(src_ptr[i]); + amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj); amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); } @@ -2536,39 +2455,17 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) adev->gfx.rlc.clear_state_size = gfx_v6_0_get_csb_size(adev); dws = adev->gfx.rlc.clear_state_size + (256 / 4); - if (adev->gfx.rlc.clear_state_obj == NULL) { - r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - NULL, NULL, - &adev->gfx.rlc.clear_state_obj); - - if (r) { - dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); - gfx_v6_0_rlc_fini(adev); - return r; - } - } - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) { - gfx_v6_0_rlc_fini(adev); - return r; - } - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.clear_state_gpu_addr); + r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.clear_state_obj, + &adev->gfx.rlc.clear_state_gpu_addr, + (void **)&adev->gfx.rlc.cs_ptr); if (r) { - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r); + dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); gfx_v6_0_rlc_fini(adev); return r; } - r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr); - if (r) { - dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r); - gfx_v6_0_rlc_fini(adev); - return r; - } /* set up the cs buffer */ dst_ptr = adev->gfx.rlc.cs_ptr; reg_list_mc_addr = adev->gfx.rlc.clear_state_gpu_addr + 256; @@ -3719,6 +3616,12 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev) u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; unsigned disable_masks[4 * 2]; + u32 ao_cu_num; + + if (adev->flags & AMD_IS_APU) + ao_cu_num = 2; + else + ao_cu_num = adev->gfx.config.max_cu_per_sh; memset(cu_info, 0, sizeof(*cu_info)); @@ -3737,16 +3640,18 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev) bitmap = gfx_v6_0_get_cu_enabled(adev); cu_info->bitmap[i][j] = bitmap; - for (k = 0; k < 16; k++) { + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { if (bitmap & mask) { - if (counter < 2) + if (counter < ao_cu_num) ao_bitmap |= mask; counter ++; } mask <<= 1; } active_cu_number += counter; - ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + if (i < 2 && j < 2) + ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index ee2f2139e2eb..00868764a0dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -21,12 +21,13 @@ * */ #include <linux/firmware.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_ih.h" #include "amdgpu_gfx.h" #include "cikd.h" #include "cik.h" +#include "cik_structs.h" #include "atom.h" #include "amdgpu_ucode.h" #include "clearstate_ci.h" @@ -48,7 +49,7 @@ #include "oss/oss_2_0_sh_mask.h" #define GFX7_NUM_GFX_RINGS 1 -#define GFX7_NUM_COMPUTE_RINGS 8 +#define GFX7_MEC_HPD_SIZE 2048 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev); @@ -1607,19 +1608,6 @@ static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, } /** - * gfx_v7_0_create_bitmask - create a bitmask - * - * @bit_width: length of the mask - * - * create a variable length bit mask (CIK). - * Returns the bitmask. - */ -static u32 gfx_v7_0_create_bitmask(u32 bit_width) -{ - return (u32)((1ULL << bit_width) - 1); -} - -/** * gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs * * @adev: amdgpu_device pointer @@ -1637,8 +1625,8 @@ static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev) data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; - mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se / - adev->gfx.config.max_sh_per_se); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se); return (~data) & mask; } @@ -1835,9 +1823,9 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) } /** - * gmc_v7_0_init_compute_vmid - gart enable + * gfx_v7_0_init_compute_vmid - gart enable * - * @rdev: amdgpu_device pointer + * @adev: amdgpu_device pointer * * Initialize compute vmid sh_mem registers * @@ -1845,7 +1833,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) #define DEFAULT_SH_MEM_BASES (0x6000) #define FIRST_COMPUTE_VMID (8) #define LAST_COMPUTE_VMID (16) -static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev) +static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) { int i; uint32_t sh_mem_config; @@ -1933,6 +1921,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) ELEMENT_SIZE, 1); sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, INDEX_STRIDE, 3); + WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { @@ -1946,12 +1935,11 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) WREG32(mmSH_MEM_APE1_BASE, 1); WREG32(mmSH_MEM_APE1_LIMIT, 0); WREG32(mmSH_MEM_BASES, sh_mem_base); - WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); } cik_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - gmc_v7_0_init_compute_vmid(adev); + gfx_v7_0_init_compute_vmid(adev); WREG32(mmSX_DEBUG_1, 0x20); @@ -2033,7 +2021,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) */ static void gfx_v7_0_scratch_init(struct amdgpu_device *adev) { - adev->gfx.scratch.num_reg = 7; + adev->gfx.scratch.num_reg = 8; adev->gfx.scratch.reg_base = mmSCRATCH_REG0; adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; } @@ -2786,91 +2774,48 @@ static int gfx_v7_0_cp_compute_load_microcode(struct amdgpu_device *adev) */ static void gfx_v7_0_cp_compute_fini(struct amdgpu_device *adev) { - int i, r; + int i; for (i = 0; i < adev->gfx.num_compute_rings; i++) { struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - if (ring->mqd_obj) { - r = amdgpu_bo_reserve(ring->mqd_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); - - amdgpu_bo_unpin(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); - - amdgpu_bo_unref(&ring->mqd_obj); - ring->mqd_obj = NULL; - } + amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL); } } static void gfx_v7_0_mec_fini(struct amdgpu_device *adev) { - int r; - - if (adev->gfx.mec.hpd_eop_obj) { - r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); - - amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); - adev->gfx.mec.hpd_eop_obj = NULL; - } + amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); } -#define MEC_HPD_SIZE 2048 - static int gfx_v7_0_mec_init(struct amdgpu_device *adev) { int r; u32 *hpd; + size_t mec_hpd_size; - /* - * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total - * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total - * Nonetheless, we assign only 1 pipe because all other pipes will - * be handled by KFD - */ - adev->gfx.mec.num_mec = 1; - adev->gfx.mec.num_pipe = 1; - adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; - - if (adev->gfx.mec.hpd_eop_obj == NULL) { - r = amdgpu_bo_create(adev, - adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2, - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, - &adev->gfx.mec.hpd_eop_obj); - if (r) { - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); - return r; - } - } + bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); - r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); - if (unlikely(r != 0)) { - gfx_v7_0_mec_fini(adev); - return r; - } - r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, - &adev->gfx.mec.hpd_eop_gpu_addr); - if (r) { - dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r); - gfx_v7_0_mec_fini(adev); - return r; - } - r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); + /* take ownership of the relevant compute queues */ + amdgpu_gfx_compute_queue_acquire(adev); + + /* allocate space for ALL pipes (even the ones we don't own) */ + mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec + * GFX7_MEC_HPD_SIZE * 2; + + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.hpd_eop_obj, + &adev->gfx.mec.hpd_eop_gpu_addr, + (void **)&hpd); if (r) { - dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); + dev_warn(adev->dev, "(%d) create, pin or map of HDP EOP bo failed\n", r); gfx_v7_0_mec_fini(adev); return r; } /* clear memory. Not sure if this is required or not */ - memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2); + memset(hpd, 0, mec_hpd_size); amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); @@ -2917,275 +2862,274 @@ struct hqd_registers u32 cp_mqd_control; }; -struct bonaire_mqd -{ - u32 header; - u32 dispatch_initiator; - u32 dimensions[3]; - u32 start_idx[3]; - u32 num_threads[3]; - u32 pipeline_stat_enable; - u32 perf_counter_enable; - u32 pgm[2]; - u32 tba[2]; - u32 tma[2]; - u32 pgm_rsrc[2]; - u32 vmid; - u32 resource_limits; - u32 static_thread_mgmt01[2]; - u32 tmp_ring_size; - u32 static_thread_mgmt23[2]; - u32 restart[3]; - u32 thread_trace_enable; - u32 reserved1; - u32 user_data[16]; - u32 vgtcs_invoke_count[2]; - struct hqd_registers queue_state; - u32 dequeue_cntr; - u32 interrupt_queue[64]; -}; - -/** - * gfx_v7_0_cp_compute_resume - setup the compute queue registers - * - * @adev: amdgpu_device pointer - * - * Program the compute queues and test them to make sure they - * are working. - * Returns 0 for success, error for failure. - */ -static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) +static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, + int mec, int pipe) { - int r, i, j; - u32 tmp; - bool use_doorbell = true; - u64 hqd_gpu_addr; - u64 mqd_gpu_addr; u64 eop_gpu_addr; - u64 wb_gpu_addr; - u32 *buf; - struct bonaire_mqd *mqd; - struct amdgpu_ring *ring; - - /* fix up chicken bits */ - tmp = RREG32(mmCP_CPF_DEBUG); - tmp |= (1 << 23); - WREG32(mmCP_CPF_DEBUG, tmp); + u32 tmp; + size_t eop_offset = (mec * adev->gfx.mec.num_pipe_per_mec + pipe) + * GFX7_MEC_HPD_SIZE * 2; - /* init the pipes */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) { - int me = (i < 4) ? 1 : 2; - int pipe = (i < 4) ? i : (i - 4); + eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset; - eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2); + cik_srbm_select(adev, mec + 1, pipe, 0, 0); - cik_srbm_select(adev, me, pipe, 0, 0); + /* write the EOP addr */ + WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); + WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); - /* write the EOP addr */ - WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); - WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); + /* set the VMID assigned */ + WREG32(mmCP_HPD_EOP_VMID, 0); - /* set the VMID assigned */ - WREG32(mmCP_HPD_EOP_VMID, 0); + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + tmp = RREG32(mmCP_HPD_EOP_CONTROL); + tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK; + tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8); + WREG32(mmCP_HPD_EOP_CONTROL, tmp); - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32(mmCP_HPD_EOP_CONTROL); - tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK; - tmp |= order_base_2(MEC_HPD_SIZE / 8); - WREG32(mmCP_HPD_EOP_CONTROL, tmp); - } cik_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); +} - /* init the queues. Just two for now. */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; +static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev) +{ + int i; - if (ring->mqd_obj == NULL) { - r = amdgpu_bo_create(adev, - sizeof(struct bonaire_mqd), - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, - &ring->mqd_obj); - if (r) { - dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); - return r; - } + /* disable the queue if it's active */ + if (RREG32(mmCP_HQD_ACTIVE) & 1) { + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); } - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - gfx_v7_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, - &mqd_gpu_addr); - if (r) { - dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); - gfx_v7_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); - if (r) { - dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); - gfx_v7_0_cp_compute_fini(adev); - return r; - } + if (i == adev->usec_timeout) + return -ETIMEDOUT; - /* init the mqd struct */ - memset(buf, 0, sizeof(struct bonaire_mqd)); + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); + WREG32(mmCP_HQD_PQ_RPTR, 0); + WREG32(mmCP_HQD_PQ_WPTR, 0); + } - mqd = (struct bonaire_mqd *)buf; - mqd->header = 0xC0310800; - mqd->static_thread_mgmt01[0] = 0xffffffff; - mqd->static_thread_mgmt01[1] = 0xffffffff; - mqd->static_thread_mgmt23[0] = 0xffffffff; - mqd->static_thread_mgmt23[1] = 0xffffffff; + return 0; +} - mutex_lock(&adev->srbm_mutex); - cik_srbm_select(adev, ring->me, - ring->pipe, - ring->queue, 0); +static void gfx_v7_0_mqd_init(struct amdgpu_device *adev, + struct cik_mqd *mqd, + uint64_t mqd_gpu_addr, + struct amdgpu_ring *ring) +{ + u64 hqd_gpu_addr; + u64 wb_gpu_addr; - /* disable wptr polling */ - tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); - tmp &= ~CP_PQ_WPTR_POLL_CNTL__EN_MASK; - WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); + /* init the mqd struct */ + memset(mqd, 0, sizeof(struct cik_mqd)); - /* enable doorbell? */ - mqd->queue_state.cp_hqd_pq_doorbell_control = - RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); - if (use_doorbell) - mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; - else - mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, - mqd->queue_state.cp_hqd_pq_doorbell_control); - - /* disable the queue if it's active */ - mqd->queue_state.cp_hqd_dequeue_request = 0; - mqd->queue_state.cp_hqd_pq_rptr = 0; - mqd->queue_state.cp_hqd_pq_wptr= 0; - if (RREG32(mmCP_HQD_ACTIVE) & 1) { - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); - for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) - break; - udelay(1); - } - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request); - WREG32(mmCP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); - } + mqd->header = 0xC0310800; + mqd->compute_static_thread_mgmt_se0 = 0xffffffff; + mqd->compute_static_thread_mgmt_se1 = 0xffffffff; + mqd->compute_static_thread_mgmt_se2 = 0xffffffff; + mqd->compute_static_thread_mgmt_se3 = 0xffffffff; - /* set the pointer to the MQD */ - mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; - mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); - WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); - WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); - /* set MQD vmid to 0 */ - mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL); - mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK; - WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); - - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - hqd_gpu_addr = ring->gpu_addr >> 8; - mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; - mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); - WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); - WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); - - /* set up the HQD, this is similar to CP_RB0_CNTL */ - mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL); - mqd->queue_state.cp_hqd_pq_control &= - ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK | - CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK); - - mqd->queue_state.cp_hqd_pq_control |= - order_base_2(ring->ring_size / 8); - mqd->queue_state.cp_hqd_pq_control |= - (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8); + /* enable doorbell? */ + mqd->cp_hqd_pq_doorbell_control = + RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); + if (ring->use_doorbell) + mqd->cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; + else + mqd->cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; + + /* set the pointer to the MQD */ + mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); + + /* set MQD vmid to 0 */ + mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL); + mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK; + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + hqd_gpu_addr = ring->gpu_addr >> 8; + mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; + mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL); + mqd->cp_hqd_pq_control &= + ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK | + CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK); + + mqd->cp_hqd_pq_control |= + order_base_2(ring->ring_size / 8); + mqd->cp_hqd_pq_control |= + (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8); #ifdef __BIG_ENDIAN - mqd->queue_state.cp_hqd_pq_control |= - 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT; + mqd->cp_hqd_pq_control |= + 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT; #endif - mqd->queue_state.cp_hqd_pq_control &= - ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK | + mqd->cp_hqd_pq_control &= + ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK | CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK | CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK); - mqd->queue_state.cp_hqd_pq_control |= - CP_HQD_PQ_CONTROL__PRIV_STATE_MASK | - CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */ - WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control); - - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; - mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr); - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, - mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi); - - /* set the wb address wether it's enabled or not */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); - mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc; - mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = - upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, - mqd->queue_state.cp_hqd_pq_rptr_report_addr); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi); - - /* enable the doorbell if requested */ - if (use_doorbell) { - mqd->queue_state.cp_hqd_pq_doorbell_control = - RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); - mqd->queue_state.cp_hqd_pq_doorbell_control &= - ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK; - mqd->queue_state.cp_hqd_pq_doorbell_control |= - (ring->doorbell_index << - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT); - mqd->queue_state.cp_hqd_pq_doorbell_control |= - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; - mqd->queue_state.cp_hqd_pq_doorbell_control &= - ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK | - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK); + mqd->cp_hqd_pq_control |= + CP_HQD_PQ_CONTROL__PRIV_STATE_MASK | + CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */ - } else { - mqd->queue_state.cp_hqd_pq_doorbell_control = 0; - } - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, - mqd->queue_state.cp_hqd_pq_doorbell_control); + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - ring->wptr = 0; - mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); - mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); + /* set the wb address wether it's enabled or not */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_rptr_report_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + + /* enable the doorbell if requested */ + if (ring->use_doorbell) { + mqd->cp_hqd_pq_doorbell_control = + RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); + mqd->cp_hqd_pq_doorbell_control &= + ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK; + mqd->cp_hqd_pq_doorbell_control |= + (ring->doorbell_index << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT); + mqd->cp_hqd_pq_doorbell_control |= + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; + mqd->cp_hqd_pq_doorbell_control &= + ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK | + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK); - /* set the vmid for the queue */ - mqd->queue_state.cp_hqd_vmid = 0; - WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid); + } else { + mqd->cp_hqd_pq_doorbell_control = 0; + } - /* activate the queue */ - mqd->queue_state.cp_hqd_active = 1; - WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); + /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + ring->wptr = 0; + mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr); + mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); + + /* set the vmid for the queue */ + mqd->cp_hqd_vmid = 0; + + /* defaults */ + mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL); + mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR); + mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI); + mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR); + mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE); + mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD); + mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE); + mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO); + mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI); + mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO); + mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI); + mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); + mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); + mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); + mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); + mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR); + + /* activate the queue */ + mqd->cp_hqd_active = 1; +} + +int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd) +{ + uint32_t tmp; + uint32_t mqd_reg; + uint32_t *mqd_data; - cik_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); + /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_MQD_CONTROL */ + mqd_data = &mqd->cp_mqd_base_addr_lo; - amdgpu_bo_kunmap(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); + /* disable wptr polling */ + tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); + tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); + WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); - ring->ready = true; + /* program all HQD registers */ + for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++) + WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); + + /* activate the HQD */ + for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) + WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); + + return 0; +} + +static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id) +{ + int r; + u64 mqd_gpu_addr; + struct cik_mqd *mqd; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + + r = amdgpu_bo_create_reserved(adev, sizeof(struct cik_mqd), PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + &mqd_gpu_addr, (void **)&mqd); + if (r) { + dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); + return r; + } + + mutex_lock(&adev->srbm_mutex); + cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring); + gfx_v7_0_mqd_deactivate(adev); + gfx_v7_0_mqd_commit(adev, mqd); + + cik_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + amdgpu_bo_kunmap(ring->mqd_obj); + amdgpu_bo_unreserve(ring->mqd_obj); + return 0; +} + +/** + * gfx_v7_0_cp_compute_resume - setup the compute queue registers + * + * @adev: amdgpu_device pointer + * + * Program the compute queues and test them to make sure they + * are working. + * Returns 0 for success, error for failure. + */ +static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) +{ + int r, i, j; + u32 tmp; + struct amdgpu_ring *ring; + + /* fix up chicken bits */ + tmp = RREG32(mmCP_CPF_DEBUG); + tmp |= (1 << 23); + WREG32(mmCP_CPF_DEBUG, tmp); + + /* init all pipes (even the ones we don't own) */ + for (i = 0; i < adev->gfx.mec.num_mec; i++) + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) + gfx_v7_0_compute_pipe_init(adev, i, j); + + /* init the queues */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + r = gfx_v7_0_compute_queue_init(adev, i); + if (r) { + gfx_v7_0_cp_compute_fini(adev); + return r; + } } gfx_v7_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; - + ring->ready = true; r = amdgpu_ring_test_ring(ring); if (r) ring->ready = false; @@ -3355,43 +3299,9 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, */ static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev) { - int r; - - /* save restore block */ - if (adev->gfx.rlc.save_restore_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve RLC sr bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.rlc.save_restore_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); - - amdgpu_bo_unref(&adev->gfx.rlc.save_restore_obj); - adev->gfx.rlc.save_restore_obj = NULL; - } - - /* clear state block */ - if (adev->gfx.rlc.clear_state_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - - amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); - adev->gfx.rlc.clear_state_obj = NULL; - } - - /* clear state block */ - if (adev->gfx.rlc.cp_table_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); - - amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj); - adev->gfx.rlc.cp_table_obj = NULL; - } + amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); } static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) @@ -3426,39 +3336,17 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) if (src_ptr) { /* save restore block */ - if (adev->gfx.rlc.save_restore_obj == NULL) { - r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, - &adev->gfx.rlc.save_restore_obj); - if (r) { - dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r); - return r; - } - } - - r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, false); - if (unlikely(r != 0)) { - gfx_v7_0_rlc_fini(adev); - return r; - } - r = amdgpu_bo_pin(adev->gfx.rlc.save_restore_obj, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.save_restore_gpu_addr); + r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.save_restore_obj, + &adev->gfx.rlc.save_restore_gpu_addr, + (void **)&adev->gfx.rlc.sr_ptr); if (r) { - amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); - dev_warn(adev->dev, "(%d) pin RLC sr bo failed\n", r); + dev_warn(adev->dev, "(%d) create, pin or map of RLC sr bo failed\n", r); gfx_v7_0_rlc_fini(adev); return r; } - r = amdgpu_bo_kmap(adev->gfx.rlc.save_restore_obj, (void **)&adev->gfx.rlc.sr_ptr); - if (r) { - dev_warn(adev->dev, "(%d) map RLC sr bo failed\n", r); - gfx_v7_0_rlc_fini(adev); - return r; - } /* write the sr buffer */ dst_ptr = adev->gfx.rlc.sr_ptr; for (i = 0; i < adev->gfx.rlc.reg_list_size; i++) @@ -3471,39 +3359,17 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) /* clear state block */ adev->gfx.rlc.clear_state_size = dws = gfx_v7_0_get_csb_size(adev); - if (adev->gfx.rlc.clear_state_obj == NULL) { - r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, - &adev->gfx.rlc.clear_state_obj); - if (r) { - dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); - gfx_v7_0_rlc_fini(adev); - return r; - } - } - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) { - gfx_v7_0_rlc_fini(adev); - return r; - } - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.clear_state_gpu_addr); + r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.clear_state_obj, + &adev->gfx.rlc.clear_state_gpu_addr, + (void **)&adev->gfx.rlc.cs_ptr); if (r) { - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r); + dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); gfx_v7_0_rlc_fini(adev); return r; } - r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr); - if (r) { - dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r); - gfx_v7_0_rlc_fini(adev); - return r; - } /* set up the cs buffer */ dst_ptr = adev->gfx.rlc.cs_ptr; gfx_v7_0_get_csb_buffer(adev, dst_ptr); @@ -3512,37 +3378,14 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) } if (adev->gfx.rlc.cp_table_size) { - if (adev->gfx.rlc.cp_table_obj == NULL) { - r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, - &adev->gfx.rlc.cp_table_obj); - if (r) { - dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); - gfx_v7_0_rlc_fini(adev); - return r; - } - } - r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); - if (unlikely(r != 0)) { - dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); - gfx_v7_0_rlc_fini(adev); - return r; - } - r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.cp_table_gpu_addr); - if (r) { - amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); - dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r); - gfx_v7_0_rlc_fini(adev); - return r; - } - r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr); + r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.cp_table_obj, + &adev->gfx.rlc.cp_table_gpu_addr, + (void **)&adev->gfx.rlc.cp_table_ptr); if (r) { - dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r); + dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); gfx_v7_0_rlc_fini(adev); return r; } @@ -3797,6 +3640,9 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) gfx_v7_0_update_rlc(adev, tmp); data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + if (orig != data) + WREG32(mmRLC_CGCG_CGLS_CTRL, data); + } else { gfx_v7_0_enable_gui_idle_interrupt(adev, false); @@ -3806,11 +3652,11 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) RREG32(mmCB_CGTT_SCLK_CTRL); data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); - } - - if (orig != data) - WREG32(mmRLC_CGCG_CGLS_CTRL, data); + if (orig != data) + WREG32(mmRLC_CGCG_CGLS_CTRL, data); + gfx_v7_0_enable_gui_idle_interrupt(adev, true); + } } static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable) @@ -4089,7 +3935,7 @@ static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev) data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; - mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_cu_per_sh); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); return (~data) & mask; } @@ -4470,7 +4316,7 @@ static int gfx_v7_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS; + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; adev->gfx.funcs = &gfx_v7_0_gfx_funcs; adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs; gfx_v7_0_set_ring_funcs(adev); @@ -4662,11 +4508,57 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.gb_addr_config = gb_addr_config; } +static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + int mec, int pipe, int queue) +{ + int r; + unsigned irq_type; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + + /* mec0 is me1 */ + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + + /* type-2 packets are deprecated on MEC, use type-3 instead */ + r = amdgpu_ring_init(adev, ring, 1024, + &adev->gfx.eop_irq, irq_type); + if (r) + return r; + + + return 0; +} + static int gfx_v7_0_sw_init(void *handle) { struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i, r; + int i, j, k, r, ring_id; + + switch (adev->asic_type) { + case CHIP_KAVERI: + adev->gfx.mec.num_mec = 2; + break; + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: + default: + adev->gfx.mec.num_mec = 1; + break; + } + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; /* EOP Event */ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); @@ -4716,29 +4608,23 @@ static int gfx_v7_0_sw_init(void *handle) return r; } - /* set up the compute queues */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - unsigned irq_type; - - /* max 32 queues per MEC */ - if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { - DRM_ERROR("Too many (%d) compute rings!\n", i); - break; + /* set up the compute queues - allocate horizontally across pipes */ + ring_id = 0; + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) + continue; + + r = gfx_v7_0_compute_ring_init(adev, + ring_id, + i, k, j); + if (r) + return r; + + ring_id++; + } } - ring = &adev->gfx.compute_ring[i]; - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; - ring->me = 1; /* first MEC */ - ring->pipe = i / 8; - ring->queue = i % 8; - sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); - irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; - /* type-2 packets are deprecated on MEC, use type-3 instead */ - r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, irq_type); - if (r) - return r; } /* reserve GDS, GWS and OA resource for gfx */ @@ -4969,8 +4855,8 @@ static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, u32 mec_int_cntl, mec_int_cntl_reg; /* - * amdgpu controls only pipe 0 of MEC1. That's why this function only - * handles the setting of interrupts for this specific pipe. All other + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other * pipes' interrupts are set by amdkfd. */ @@ -4979,6 +4865,15 @@ static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, case 0: mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; break; + case 1: + mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; + break; + case 2: + mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; + break; + case 3: + mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; + break; default: DRM_DEBUG("invalid pipe %d\n", pipe); return; @@ -5336,6 +5231,12 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; unsigned disable_masks[4 * 2]; + u32 ao_cu_num; + + if (adev->flags & AMD_IS_APU) + ao_cu_num = 2; + else + ao_cu_num = adev->gfx.config.max_cu_per_sh; memset(cu_info, 0, sizeof(*cu_info)); @@ -5354,16 +5255,18 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) bitmap = gfx_v7_0_get_cu_active_bitmap(adev); cu_info->bitmap[i][j] = bitmap; - for (k = 0; k < 16; k ++) { + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { - if (counter < 2) + if (counter < ao_cu_num) ao_bitmap |= mask; counter ++; } mask <<= 1; } active_cu_number += counter; - ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + if (i < 2 && j < 2) + ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } } gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h index 2f5164cc0e53..6fb9c1524691 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h @@ -29,4 +29,9 @@ extern const struct amdgpu_ip_block_version gfx_v7_1_ip_block; extern const struct amdgpu_ip_block_version gfx_v7_2_ip_block; extern const struct amdgpu_ip_block_version gfx_v7_3_ip_block; +struct amdgpu_device; +struct cik_mqd; + +int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 758d636a6f52..fc260c13b1da 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -21,7 +21,7 @@ * */ #include <linux/firmware.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_gfx.h" #include "vi.h" @@ -40,7 +40,6 @@ #include "bif/bif_5_0_d.h" #include "bif/bif_5_0_sh_mask.h" - #include "gca/gfx_8_0_d.h" #include "gca/gfx_8_0_enum.h" #include "gca/gfx_8_0_sh_mask.h" @@ -52,7 +51,7 @@ #include "smu/smu_7_1_3_d.h" #define GFX8_NUM_GFX_RINGS 1 -#define GFX8_NUM_COMPUTE_RINGS 8 +#define GFX8_MEC_HPD_SIZE 2048 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 @@ -194,8 +193,8 @@ static const u32 tonga_golden_common_all[] = mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, - mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, - mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF }; static const u32 tonga_mgcg_cgcg_init[] = @@ -304,8 +303,8 @@ static const u32 polaris11_golden_common_all[] = mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, - mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, - mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, }; static const u32 golden_settings_polaris10_a11[] = @@ -337,8 +336,8 @@ static const u32 polaris10_golden_common_all[] = mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, - mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, - mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, }; static const u32 fiji_golden_common_all[] = @@ -349,8 +348,8 @@ static const u32 fiji_golden_common_all[] = mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, - mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, - mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, }; @@ -437,8 +436,8 @@ static const u32 iceland_golden_common_all[] = mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, - mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, - mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF }; static const u32 iceland_mgcg_cgcg_init[] = @@ -533,8 +532,8 @@ static const u32 cz_golden_common_all[] = mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, - mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, - mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF }; static const u32 cz_mgcg_cgcg_init[] = @@ -638,8 +637,8 @@ static const u32 stoney_golden_common_all[] = mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, - mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, - mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, }; static const u32 stoney_mgcg_cgcg_init[] = @@ -657,10 +656,8 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); -static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr); -static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr); -static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev); -static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev); +static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); +static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) { @@ -753,7 +750,7 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) { - adev->gfx.scratch.num_reg = 7; + adev->gfx.scratch.num_reg = 8; adev->gfx.scratch.reg_base = mmSCRATCH_REG0; adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; } @@ -859,7 +856,8 @@ err1: } -static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) { +static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) +{ release_firmware(adev->gfx.pfp_fw); adev->gfx.pfp_fw = NULL; release_firmware(adev->gfx.me_fw); @@ -941,12 +939,6 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - /* chain ib ucode isn't formal released, just disable it by far - * TODO: when ucod ready we should use ucode version to judge if - * chain-ib support or not. - */ - adev->virt.chained_ib_support = false; - adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); @@ -960,6 +952,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + /* + * Support for MCBP/Virtualization in combination with chained IBs is + * formal released on feature version #46 + */ + if (adev->gfx.ce_feature_version >= 46 && + adev->gfx.pfp_feature_version >= 46) { + adev->virt.chained_ib_support = true; + DRM_INFO("Chained IB support enabled!\n"); + } else + adev->virt.chained_ib_support = false; + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) @@ -1235,29 +1238,8 @@ static void cz_init_cp_jump_table(struct amdgpu_device *adev) static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) { - int r; - - /* clear state block */ - if (adev->gfx.rlc.clear_state_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); - adev->gfx.rlc.clear_state_obj = NULL; - } - - /* jump table block */ - if (adev->gfx.rlc.cp_table_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); - amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj); - adev->gfx.rlc.cp_table_obj = NULL; - } + amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); } static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) @@ -1275,39 +1257,17 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) /* clear state block */ adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); - if (adev->gfx.rlc.clear_state_obj == NULL) { - r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, - &adev->gfx.rlc.clear_state_obj); - if (r) { - dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); - gfx_v8_0_rlc_fini(adev); - return r; - } - } - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) { - gfx_v8_0_rlc_fini(adev); - return r; - } - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.clear_state_gpu_addr); + r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.clear_state_obj, + &adev->gfx.rlc.clear_state_gpu_addr, + (void **)&adev->gfx.rlc.cs_ptr); if (r) { - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r); + dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); gfx_v8_0_rlc_fini(adev); return r; } - r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr); - if (r) { - dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r); - gfx_v8_0_rlc_fini(adev); - return r; - } /* set up the cs buffer */ dst_ptr = adev->gfx.rlc.cs_ptr; gfx_v8_0_get_csb_buffer(adev, dst_ptr); @@ -1318,34 +1278,13 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) if ((adev->asic_type == CHIP_CARRIZO) || (adev->asic_type == CHIP_STONEY)) { adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ - if (adev->gfx.rlc.cp_table_obj == NULL) { - r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, - &adev->gfx.rlc.cp_table_obj); - if (r) { - dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); - return r; - } - } - - r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); - if (unlikely(r != 0)) { - dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); - return r; - } - r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.cp_table_gpu_addr); + r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.cp_table_obj, + &adev->gfx.rlc.cp_table_gpu_addr, + (void **)&adev->gfx.rlc.cp_table_ptr); if (r) { - amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); - dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r); - return r; - } - r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr); - if (r) { - dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r); + dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); return r; } @@ -1360,141 +1299,36 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) { - int r; - - if (adev->gfx.mec.hpd_eop_obj) { - r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); - amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); - adev->gfx.mec.hpd_eop_obj = NULL; - } -} - -static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - int r = 0; - - r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); - if (r) - return r; - - ring->adev = NULL; - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_KIQ; - if (adev->gfx.mec2_fw) { - ring->me = 2; - ring->pipe = 0; - } else { - ring->me = 1; - ring->pipe = 1; - } - - ring->queue = 0; - ring->eop_gpu_addr = kiq->eop_gpu_addr; - sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); - r = amdgpu_ring_init(adev, ring, 1024, - irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); - if (r) - dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); - - return r; -} -static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq) -{ - amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); - amdgpu_ring_fini(ring); + amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); } -#define MEC_HPD_SIZE 2048 - static int gfx_v8_0_mec_init(struct amdgpu_device *adev) { int r; u32 *hpd; + size_t mec_hpd_size; - /* - * we assign only 1 pipe because all other pipes will - * be handled by KFD - */ - adev->gfx.mec.num_mec = 1; - adev->gfx.mec.num_pipe = 1; - adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; - - if (adev->gfx.mec.hpd_eop_obj == NULL) { - r = amdgpu_bo_create(adev, - adev->gfx.mec.num_queue * MEC_HPD_SIZE, - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, - &adev->gfx.mec.hpd_eop_obj); - if (r) { - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); - return r; - } - } - - r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); - if (unlikely(r != 0)) { - gfx_v8_0_mec_fini(adev); - return r; - } - r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, - &adev->gfx.mec.hpd_eop_gpu_addr); - if (r) { - dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r); - gfx_v8_0_mec_fini(adev); - return r; - } - r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); - if (r) { - dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); - gfx_v8_0_mec_fini(adev); - return r; - } - - memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE); - - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); - - return 0; -} - -static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq; + bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); - amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); -} + /* take ownership of the relevant compute queues */ + amdgpu_gfx_compute_queue_acquire(adev); -static int gfx_v8_0_kiq_init(struct amdgpu_device *adev) -{ - int r; - u32 *hpd; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; + mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; - r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, - &kiq->eop_gpu_addr, (void **)&hpd); + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.hpd_eop_obj, + &adev->gfx.mec.hpd_eop_gpu_addr, + (void **)&hpd); if (r) { - dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); return r; } - memset(hpd, 0, MEC_HPD_SIZE); + memset(hpd, 0, mec_hpd_size); - r = amdgpu_bo_reserve(kiq->eop_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); - amdgpu_bo_kunmap(kiq->eop_obj); - amdgpu_bo_unreserve(kiq->eop_obj); + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); return 0; } @@ -1907,46 +1741,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.max_tile_pipes = 2; adev->gfx.config.max_sh_per_se = 1; adev->gfx.config.max_backends_per_se = 2; - - switch (adev->pdev->revision) { - case 0xc4: - case 0x84: - case 0xc8: - case 0xcc: - case 0xe1: - case 0xe3: - /* B10 */ - adev->gfx.config.max_cu_per_sh = 8; - break; - case 0xc5: - case 0x81: - case 0x85: - case 0xc9: - case 0xcd: - case 0xe2: - case 0xe4: - /* B8 */ - adev->gfx.config.max_cu_per_sh = 6; - break; - case 0xc6: - case 0xca: - case 0xce: - case 0x88: - case 0xe6: - /* B6 */ - adev->gfx.config.max_cu_per_sh = 6; - break; - case 0xc7: - case 0x87: - case 0xcb: - case 0xe5: - case 0x89: - default: - /* B4 */ - adev->gfx.config.max_cu_per_sh = 4; - break; - } - + adev->gfx.config.max_cu_per_sh = 8; adev->gfx.config.max_texture_channel_caches = 2; adev->gfx.config.max_gprs = 256; adev->gfx.config.max_gs_threads = 32; @@ -1963,35 +1758,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.max_tile_pipes = 2; adev->gfx.config.max_sh_per_se = 1; adev->gfx.config.max_backends_per_se = 1; - - switch (adev->pdev->revision) { - case 0x80: - case 0x81: - case 0xc0: - case 0xc1: - case 0xc2: - case 0xc4: - case 0xc8: - case 0xc9: - case 0xd6: - case 0xda: - case 0xe9: - case 0xea: - adev->gfx.config.max_cu_per_sh = 3; - break; - case 0x83: - case 0xd0: - case 0xd1: - case 0xd2: - case 0xd4: - case 0xdb: - case 0xe1: - case 0xe2: - default: - adev->gfx.config.max_cu_per_sh = 2; - break; - } - + adev->gfx.config.max_cu_per_sh = 3; adev->gfx.config.max_texture_channel_caches = 2; adev->gfx.config.max_gprs = 256; adev->gfx.config.max_gs_threads = 16; @@ -2083,13 +1850,67 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) return 0; } +static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + int mec, int pipe, int queue) +{ + int r; + unsigned irq_type; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + + ring = &adev->gfx.compute_ring[ring_id]; + + /* mec0 is me1 */ + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + + (ring_id * GFX8_MEC_HPD_SIZE); + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + + /* type-2 packets are deprecated on MEC, use type-3 instead */ + r = amdgpu_ring_init(adev, ring, 1024, + &adev->gfx.eop_irq, irq_type); + if (r) + return r; + + + return 0; +} + static int gfx_v8_0_sw_init(void *handle) { - int i, r; + int i, j, k, r, ring_id; struct amdgpu_ring *ring; struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + switch (adev->asic_type) { + case CHIP_FIJI: + case CHIP_TONGA: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_POLARIS10: + case CHIP_CARRIZO: + adev->gfx.mec.num_mec = 2; + break; + case CHIP_TOPAZ: + case CHIP_STONEY: + default: + adev->gfx.mec.num_mec = 1; + break; + } + + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + /* KIQ event */ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); if (r) @@ -2151,49 +1972,41 @@ static int gfx_v8_0_sw_init(void *handle) return r; } - /* set up the compute queues */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - unsigned irq_type; - /* max 32 queues per MEC */ - if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { - DRM_ERROR("Too many (%d) compute rings!\n", i); - break; + /* set up the compute queues - allocate horizontally across pipes */ + ring_id = 0; + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) + continue; + + r = gfx_v8_0_compute_ring_init(adev, + ring_id, + i, k, j); + if (r) + return r; + + ring_id++; + } } - ring = &adev->gfx.compute_ring[i]; - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; - ring->me = 1; /* first MEC */ - ring->pipe = i / 8; - ring->queue = i % 8; - ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); - sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); - irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; - /* type-2 packets are deprecated on MEC, use type-3 instead */ - r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, - irq_type); - if (r) - return r; } - if (amdgpu_sriov_vf(adev)) { - r = gfx_v8_0_kiq_init(adev); - if (r) { - DRM_ERROR("Failed to init KIQ BOs!\n"); - return r; - } + r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE); + if (r) { + DRM_ERROR("Failed to init KIQ BOs!\n"); + return r; + } - kiq = &adev->gfx.kiq; - r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); - if (r) - return r; + kiq = &adev->gfx.kiq; + r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); + if (r) + return r; - /* create MQD for all compute queues as wel as KIQ for SRIOV case */ - r = gfx_v8_0_compute_mqd_sw_init(adev); - if (r) - return r; - } + /* create MQD for all compute queues as well as KIQ for SRIOV case */ + r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); + if (r) + return r; /* reserve GDS, GWS and OA resource for gfx */ r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, @@ -2237,11 +2050,9 @@ static int gfx_v8_0_sw_fini(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - if (amdgpu_sriov_vf(adev)) { - gfx_v8_0_compute_mqd_sw_fini(adev); - gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); - gfx_v8_0_kiq_fini(adev); - } + amdgpu_gfx_compute_mqd_sw_fini(adev); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); + amdgpu_gfx_kiq_fini(adev); gfx_v8_0_mec_fini(adev); gfx_v8_0_rlc_fini(adev); @@ -3594,11 +3405,6 @@ static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, WREG32(mmGRBM_GFX_INDEX, data); } -static u32 gfx_v8_0_create_bitmask(u32 bit_width) -{ - return (u32)((1ULL << bit_width) - 1); -} - static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) { u32 data, mask; @@ -3608,8 +3414,8 @@ static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); - mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / - adev->gfx.config.max_sh_per_se); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se); return (~data) & mask; } @@ -3823,7 +3629,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) /** * gfx_v8_0_init_compute_vmid - gart enable * - * @rdev: amdgpu_device pointer + * @adev: amdgpu_device pointer * * Initialize compute vmid sh_mem registers * @@ -3901,6 +3707,8 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) ELEMENT_SIZE, 1); sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, INDEX_STRIDE, 3); + WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); + mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { vi_srbm_select(adev, 0, 0, 0, i); @@ -3924,7 +3732,6 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) WREG32(mmSH_MEM_APE1_BASE, 1); WREG32(mmSH_MEM_APE1_LIMIT, 0); - WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); } vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); @@ -4481,6 +4288,39 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) return 0; } +static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + u32 tmp; + /* no gfx doorbells on iceland */ + if (adev->asic_type == CHIP_TOPAZ) + return; + + tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); + + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 1); + } else { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); + } + + WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); + + if (adev->flags & AMD_IS_APU) + return; + + tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, + DOORBELL_RANGE_LOWER, + AMDGPU_DOORBELL_GFX_RING0); + WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); + + WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, + CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); +} static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) { @@ -4528,34 +4368,7 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) WREG32(mmCP_RB0_BASE, rb_addr); WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); - /* no gfx doorbells on iceland */ - if (adev->asic_type != CHIP_TOPAZ) { - tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); - if (ring->use_doorbell) { - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_OFFSET, ring->doorbell_index); - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_HIT, 0); - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_EN, 1); - } else { - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_EN, 0); - } - WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); - - if (adev->asic_type == CHIP_TONGA) { - tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, - DOORBELL_RANGE_LOWER, - AMDGPU_DOORBELL_GFX_RING0); - WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); - - WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, - CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); - } - - } - + gfx_v8_0_set_cpg_door_bell(adev, ring); /* start the ring */ amdgpu_ring_clear_ring(ring); gfx_v8_0_cp_gfx_start(adev); @@ -4628,29 +4441,6 @@ static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) return 0; } -static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev) -{ - int i, r; - - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - - if (ring->mqd_obj) { - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); - - amdgpu_bo_unpin(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); - - amdgpu_bo_unref(&ring->mqd_obj); - ring->mqd_obj = NULL; - ring->mqd_ptr = NULL; - ring->mqd_gpu_addr = 0; - } - } -} - /* KIQ functions */ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) { @@ -4666,45 +4456,111 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) WREG32(mmRLC_CP_SCHEDULERS, tmp); } -static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring) +static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) { - amdgpu_ring_alloc(ring, 8); + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + uint32_t scratch, tmp = 0; + uint64_t queue_mask = 0; + int r, i; + + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { + if (!test_bit(i, adev->gfx.mec.queue_bitmap)) + continue; + + /* This situation may be hit in the future if a new HW + * generation exposes more than 64 queues. If so, the + * definition of queue_mask needs updating */ + if (WARN_ON(i >= (sizeof(queue_mask)*8))) { + DRM_ERROR("Invalid KCQ enabled: %d\n", i); + break; + } + + queue_mask |= (1ull << i); + } + + r = amdgpu_gfx_scratch_get(adev, &scratch); + if (r) { + DRM_ERROR("Failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + + r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11); + if (r) { + DRM_ERROR("Failed to lock KIQ (%d).\n", r); + amdgpu_gfx_scratch_free(adev, scratch); + return r; + } /* set resources */ - amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6)); - amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ - amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */ - amdgpu_ring_write(ring, 0); /* queue mask hi */ - amdgpu_ring_write(ring, 0); /* gws mask lo */ - amdgpu_ring_write(ring, 0); /* gws mask hi */ - amdgpu_ring_write(ring, 0); /* oac mask */ - amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */ - amdgpu_ring_commit(ring); - udelay(50); + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ + amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + + /* map queues */ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) | + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */ + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); + } + /* write to scratch for completion */ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); + amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); + amdgpu_ring_write(kiq_ring, 0xDEADBEEF); + amdgpu_ring_commit(kiq_ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i >= adev->usec_timeout) { + DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } + amdgpu_gfx_scratch_free(adev, scratch); + + return r; } -static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring, - struct amdgpu_ring *ring) +static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) { - struct amdgpu_device *adev = kiq_ring->adev; - uint64_t mqd_addr, wptr_addr; + int i, r = 0; - mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); - wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - amdgpu_ring_alloc(kiq_ring, 8); + if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { + WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) + break; + udelay(1); + } + if (i == adev->usec_timeout) + r = -ETIMEDOUT; + } + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); + WREG32(mmCP_HQD_PQ_RPTR, 0); + WREG32(mmCP_HQD_PQ_WPTR, 0); - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); - /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ - amdgpu_ring_write(kiq_ring, 0x21010000); - amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) | - (ring->queue << 26) | - (ring->pipe << 29) | - ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */ - amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); - amdgpu_ring_commit(kiq_ring); - udelay(50); + return r; } static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) @@ -4721,7 +4577,12 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) mqd->compute_static_thread_mgmt_se2 = 0xffffffff; mqd->compute_static_thread_mgmt_se3 = 0xffffffff; mqd->compute_misc_reserved = 0x00000003; - + if (!(adev->flags & AMD_IS_APU)) { + mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr + + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); + mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr + + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); + } eop_base_addr = ring->eop_gpu_addr >> 8; mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); @@ -4729,7 +4590,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ tmp = RREG32(mmCP_HQD_EOP_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, - (order_base_2(MEC_HPD_SIZE / 4) - 1)); + (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); mqd->cp_hqd_eop_control = tmp; @@ -4741,11 +4602,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_pq_doorbell_control = tmp; - /* disable the queue if it's active */ - mqd->cp_hqd_dequeue_request = 0; - mqd->cp_hqd_pq_rptr = 0; - mqd->cp_hqd_pq_wptr = 0; - /* set the pointer to the MQD */ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); @@ -4815,149 +4671,160 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); mqd->cp_hqd_persistent_state = tmp; + /* set MTYPE */ + tmp = RREG32(mmCP_HQD_IB_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); + tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3); + mqd->cp_hqd_ib_control = tmp; + + tmp = RREG32(mmCP_HQD_IQ_TIMER); + tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3); + mqd->cp_hqd_iq_timer = tmp; + + tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3); + mqd->cp_hqd_ctx_save_control = tmp; + + /* defaults */ + mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); + mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); + mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); + mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); + mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); + mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); + mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); + mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); + mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE); + mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET); + mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE); + mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS); + mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR); + mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); + mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); + /* activate the queue */ mqd->cp_hqd_active = 1; return 0; } -static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring) +int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, + struct vi_mqd *mqd) { - struct amdgpu_device *adev = ring->adev; - struct vi_mqd *mqd = ring->mqd_ptr; - int j; + uint32_t mqd_reg; + uint32_t *mqd_data; + + /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */ + mqd_data = &mqd->cp_mqd_base_addr_lo; /* disable wptr polling */ WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); - WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); - WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); - - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); - - /* enable doorbell? */ - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); + /* program all HQD registers */ + for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++) + WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); - /* disable the queue if it's active */ - if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); - for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) - break; - udelay(1); - } - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); - WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); + /* Tonga errata: EOP RPTR/WPTR should be left unmodified. + * This is safe since EOP RPTR==WPTR for any inactive HQD + * on ASICs that do not support context-save. + * EOP writes/reads can start anywhere in the ring. + */ + if (adev->asic_type != CHIP_TONGA) { + WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr); + WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr); + WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem); } - /* set the pointer to the MQD */ - WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); - WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); - - /* set MQD vmid to 0 */ - WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control); - - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); - WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); - - /* set up the HQD, this is similar to CP_RB0_CNTL */ - WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); - - /* set the wb address whether it's enabled or not */ - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, - mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - mqd->cp_hqd_pq_rptr_report_addr_hi); + for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++) + WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); + /* activate the HQD */ + for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) + WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); - /* enable the doorbell if requested */ - if (ring->use_doorbell) { - if ((adev->asic_type == CHIP_CARRIZO) || - (adev->asic_type == CHIP_FIJI) || - (adev->asic_type == CHIP_STONEY)) { - WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, - AMDGPU_DOORBELL_KIQ << 2); - WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, - AMDGPU_DOORBELL_MEC_RING7 << 2); - } - } - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); + return 0; +} - /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); +static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct vi_mqd *mqd = ring->mqd_ptr; + int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; - /* set the vmid for the queue */ - WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); + gfx_v8_0_kiq_setting(ring); - WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); + if (adev->gfx.in_reset) { /* for GPU_RESET case */ + /* reset MQD to a clean status */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); - /* activate the queue */ - WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); + /* reset ring buffer */ + ring->wptr = 0; + amdgpu_ring_clear_ring(ring); + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v8_0_mqd_commit(adev, mqd); + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } else { + memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); + ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; + ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v8_0_mqd_init(ring); + gfx_v8_0_mqd_commit(adev, mqd); + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); - if (ring->use_doorbell) - WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); + } return 0; } -static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) +static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct vi_mqd *mqd = ring->mqd_ptr; - bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ); - int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; - - if (is_kiq) { - gfx_v8_0_kiq_setting(&kiq->ring); - } else { - mqd_idx = ring - &adev->gfx.compute_ring[0]; - } + int mqd_idx = ring - &adev->gfx.compute_ring[0]; - if (!adev->gfx.in_reset) { - memset((void *)mqd, 0, sizeof(*mqd)); + if (!adev->gfx.in_reset && !adev->gfx.in_suspend) { + memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); + ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; + ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); gfx_v8_0_mqd_init(ring); - if (is_kiq) - gfx_v8_0_kiq_init_register(ring); vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); - } else { /* for GPU_RESET case */ + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); + } else if (adev->gfx.in_reset) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); - + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); /* reset ring buffer */ ring->wptr = 0; amdgpu_ring_clear_ring(ring); - - if (is_kiq) { - mutex_lock(&adev->srbm_mutex); - vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - gfx_v8_0_kiq_init_register(ring); - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - } + } else { + amdgpu_ring_clear_ring(ring); } - - if (is_kiq) - gfx_v8_0_kiq_enable(ring); - else - gfx_v8_0_map_queue_enable(&kiq->ring, ring); - return 0; } +static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) +{ + if (adev->asic_type > CHIP_TONGA) { + WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); + WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); + } + /* enable doorbells */ + WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); +} + static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) { struct amdgpu_ring *ring = NULL; @@ -4981,13 +4848,6 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) if (r) goto done; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; - goto done; - } - for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; @@ -4996,272 +4856,41 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); if (!r) { - r = gfx_v8_0_kiq_init_queue(ring); + r = gfx_v8_0_kcq_init_queue(ring); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } amdgpu_bo_unreserve(ring->mqd_obj); if (r) goto done; - - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) - ring->ready = false; } -done: - return r; -} - -static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) -{ - int r, i, j; - u32 tmp; - bool use_doorbell = true; - u64 hqd_gpu_addr; - u64 mqd_gpu_addr; - u64 eop_gpu_addr; - u64 wb_gpu_addr; - u32 *buf; - struct vi_mqd *mqd; - - /* init the queues. */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - - if (ring->mqd_obj == NULL) { - r = amdgpu_bo_create(adev, - sizeof(struct vi_mqd), - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, - NULL, &ring->mqd_obj); - if (r) { - dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); - return r; - } - } - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - gfx_v8_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, - &mqd_gpu_addr); - if (r) { - dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); - gfx_v8_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); - if (r) { - dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); - gfx_v8_0_cp_compute_fini(adev); - return r; - } - - /* init the mqd struct */ - memset(buf, 0, sizeof(struct vi_mqd)); - - mqd = (struct vi_mqd *)buf; - mqd->header = 0xC0310800; - mqd->compute_pipelinestat_enable = 0x00000001; - mqd->compute_static_thread_mgmt_se0 = 0xffffffff; - mqd->compute_static_thread_mgmt_se1 = 0xffffffff; - mqd->compute_static_thread_mgmt_se2 = 0xffffffff; - mqd->compute_static_thread_mgmt_se3 = 0xffffffff; - mqd->compute_misc_reserved = 0x00000003; - - mutex_lock(&adev->srbm_mutex); - vi_srbm_select(adev, ring->me, - ring->pipe, - ring->queue, 0); - - eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); - eop_gpu_addr >>= 8; - - /* write the EOP addr */ - WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr); - WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); - - /* set the VMID assigned */ - WREG32(mmCP_HQD_VMID, 0); - - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32(mmCP_HQD_EOP_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, - (order_base_2(MEC_HPD_SIZE / 4) - 1)); - WREG32(mmCP_HQD_EOP_CONTROL, tmp); - - /* disable wptr polling */ - tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); - tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); - WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); + gfx_v8_0_set_mec_doorbell_range(adev); - mqd->cp_hqd_eop_base_addr_lo = - RREG32(mmCP_HQD_EOP_BASE_ADDR); - mqd->cp_hqd_eop_base_addr_hi = - RREG32(mmCP_HQD_EOP_BASE_ADDR_HI); - - /* enable doorbell? */ - tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); - if (use_doorbell) { - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - } else { - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); - } - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); - mqd->cp_hqd_pq_doorbell_control = tmp; - - /* disable the queue if it's active */ - mqd->cp_hqd_dequeue_request = 0; - mqd->cp_hqd_pq_rptr = 0; - mqd->cp_hqd_pq_wptr= 0; - if (RREG32(mmCP_HQD_ACTIVE) & 1) { - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); - for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) - break; - udelay(1); - } - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); - WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); - } - - /* set the pointer to the MQD */ - mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; - mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); - WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); - WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); - - /* set MQD vmid to 0 */ - tmp = RREG32(mmCP_MQD_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); - WREG32(mmCP_MQD_CONTROL, tmp); - mqd->cp_mqd_control = tmp; - - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - hqd_gpu_addr = ring->gpu_addr >> 8; - mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; - mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); - WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); - WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); - - /* set up the HQD, this is similar to CP_RB0_CNTL */ - tmp = RREG32(mmCP_HQD_PQ_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, - (order_base_2(ring->ring_size / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); -#ifdef __BIG_ENDIAN - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); -#endif - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); - WREG32(mmCP_HQD_PQ_CONTROL, tmp); - mqd->cp_hqd_pq_control = tmp; - - /* set the wb address wether it's enabled or not */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); - mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; - mqd->cp_hqd_pq_rptr_report_addr_hi = - upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, - mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - mqd->cp_hqd_pq_rptr_report_addr_hi); - - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; - mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, - mqd->cp_hqd_pq_wptr_poll_addr_hi); - - /* enable the doorbell if requested */ - if (use_doorbell) { - if ((adev->asic_type == CHIP_CARRIZO) || - (adev->asic_type == CHIP_FIJI) || - (adev->asic_type == CHIP_STONEY) || - (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS10) || - (adev->asic_type == CHIP_POLARIS12)) { - WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, - AMDGPU_DOORBELL_KIQ << 2); - WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, - AMDGPU_DOORBELL_MEC_RING7 << 2); - } - tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_OFFSET, ring->doorbell_index); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); - mqd->cp_hqd_pq_doorbell_control = tmp; - - } else { - mqd->cp_hqd_pq_doorbell_control = 0; - } - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, - mqd->cp_hqd_pq_doorbell_control); - - /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - ring->wptr = 0; - mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); - mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); - - /* set the vmid for the queue */ - mqd->cp_hqd_vmid = 0; - WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); - - tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); - tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); - WREG32(mmCP_HQD_PERSISTENT_STATE, tmp); - mqd->cp_hqd_persistent_state = tmp; - if (adev->asic_type == CHIP_STONEY || - adev->asic_type == CHIP_POLARIS11 || - adev->asic_type == CHIP_POLARIS10 || - adev->asic_type == CHIP_POLARIS12) { - tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); - tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); - WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); - } - - /* activate the queue */ - mqd->cp_hqd_active = 1; - WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); - - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - - amdgpu_bo_kunmap(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); - } + r = gfx_v8_0_kiq_kcq_enable(adev); + if (r) + goto done; - if (use_doorbell) { - tmp = RREG32(mmCP_PQ_STATUS); - tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); - WREG32(mmCP_PQ_STATUS, tmp); + /* Test KIQ */ + ring = &adev->gfx.kiq.ring; + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + goto done; } - gfx_v8_0_cp_compute_enable(adev, true); - + /* Test KCQs */ for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - + ring = &adev->gfx.compute_ring[i]; ring->ready = true; r = amdgpu_ring_test_ring(ring); if (r) ring->ready = false; } - return 0; +done: + return r; } static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) @@ -5314,10 +4943,7 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) if (r) return r; - if (amdgpu_sriov_vf(adev)) - r = gfx_v8_0_kiq_resume(adev); - else - r = gfx_v8_0_cp_compute_resume(adev); + r = gfx_v8_0_kiq_resume(adev); if (r) return r; @@ -5361,7 +4987,6 @@ static int gfx_v8_0_hw_fini(void *handle) } gfx_v8_0_cp_enable(adev, false); gfx_v8_0_rlc_stop(adev); - gfx_v8_0_cp_compute_fini(adev); amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); @@ -5372,15 +4997,18 @@ static int gfx_v8_0_hw_fini(void *handle) static int gfx_v8_0_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - + adev->gfx.in_suspend = true; return gfx_v8_0_hw_fini(adev); } static int gfx_v8_0_resume(void *handle) { + int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - return gfx_v8_0_hw_init(adev); + r = gfx_v8_0_hw_init(adev); + adev->gfx.in_suspend = false; + return r; } static bool gfx_v8_0_is_idle(void *handle) @@ -5469,25 +5097,6 @@ static bool gfx_v8_0_check_soft_reset(void *handle) } } -static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - int i; - - mutex_lock(&adev->srbm_mutex); - vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { - WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, 2); - for (i = 0; i < adev->usec_timeout; i++) { - if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) - break; - udelay(1); - } - } - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); -} - static int gfx_v8_0_pre_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -5517,7 +5126,11 @@ static int gfx_v8_0_pre_soft_reset(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) { struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - gfx_v8_0_inactive_hqd(adev, ring); + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v8_0_deactivate_hqd(adev, 2); + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); } /* Disable MEC parsing/prefetching */ gfx_v8_0_cp_compute_enable(adev, false); @@ -5588,18 +5201,6 @@ static int gfx_v8_0_soft_reset(void *handle) return 0; } -static void gfx_v8_0_init_hqd(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - mutex_lock(&adev->srbm_mutex); - vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); - WREG32(mmCP_HQD_PQ_RPTR, 0); - WREG32(mmCP_HQD_PQ_WPTR, 0); - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); -} - static int gfx_v8_0_post_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -5625,9 +5226,13 @@ static int gfx_v8_0_post_soft_reset(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) { struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - gfx_v8_0_init_hqd(adev, ring); + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v8_0_deactivate_hqd(adev, 2); + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); } - gfx_v8_0_cp_compute_resume(adev); + gfx_v8_0_kiq_resume(adev); } gfx_v8_0_rlc_start(adev); @@ -5773,7 +5378,7 @@ static int gfx_v8_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS; + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; adev->gfx.funcs = &gfx_v8_0_gfx_funcs; gfx_v8_0_set_ring_funcs(adev); gfx_v8_0_set_irq_funcs(adev); @@ -6265,6 +5870,8 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); if (temp != data) WREG32(mmRLC_CGCG_CGLS_CTRL, data); + /* enable interrupts again for PG */ + gfx_v8_0_enable_gui_idle_interrupt(adev, true); } gfx_v8_0_wait_for_rlc_serdes(adev); @@ -6568,9 +6175,13 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, control |= ib->length_dw | (vm_id << 24); - if (amdgpu_sriov_vf(ring->adev) && ib->flags & AMDGPU_IB_FLAG_PREEMPT) + if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { control |= INDIRECT_BUFFER_PRE_ENB(1); + if (!(ib->flags & AMDGPU_IB_FLAG_CE)) + gfx_v8_0_ring_emit_de_meta(ring); + } + amdgpu_ring_write(ring, header); amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN @@ -6753,8 +6364,7 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) uint32_t dw2 = 0; if (amdgpu_sriov_vf(ring->adev)) - gfx_v8_0_ring_emit_ce_meta_init(ring, - (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr); + gfx_v8_0_ring_emit_ce_meta(ring); dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ if (flags & AMDGPU_HAVE_CTX_SWITCH) { @@ -6780,10 +6390,6 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); amdgpu_ring_write(ring, dw2); amdgpu_ring_write(ring, 0); - - if (amdgpu_sriov_vf(ring->adev)) - gfx_v8_0_ring_emit_de_meta_init(ring, - (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr); } static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) @@ -6813,7 +6419,6 @@ static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; } - static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) { struct amdgpu_device *adev = ring->adev; @@ -6851,15 +6456,27 @@ static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, int me, int pipe, enum amdgpu_interrupt_state state) { + u32 mec_int_cntl, mec_int_cntl_reg; + /* - * amdgpu controls only pipe 0 of MEC1. That's why this function only - * handles the setting of interrupts for this specific pipe. All other + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other * pipes' interrupts are set by amdkfd. */ if (me == 1) { switch (pipe) { case 0: + mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; + break; + case 1: + mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; + break; + case 2: + mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; + break; + case 3: + mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; break; default: DRM_DEBUG("invalid pipe %d\n", pipe); @@ -6870,8 +6487,20 @@ static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, return; } - WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, - state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; + WREG32(mec_int_cntl_reg, mec_int_cntl); + break; + case AMDGPU_IRQ_STATE_ENABLE: + mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; + WREG32(mec_int_cntl_reg, mec_int_cntl); + break; + default: + break; + } } static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, @@ -6992,8 +6621,6 @@ static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, { struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); - BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ); - switch (type) { case AMDGPU_CP_KIQ_IRQ_DRIVER0: WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, @@ -7023,8 +6650,6 @@ static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, u8 me_id, pipe_id, queue_id; struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); - BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ); - me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; queue_id = (entry->ring_id & 0x70) >> 4; @@ -7257,7 +6882,7 @@ static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); - mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; } @@ -7268,9 +6893,15 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; unsigned disable_masks[4 * 2]; + u32 ao_cu_num; memset(cu_info, 0, sizeof(*cu_info)); + if (adev->flags & AMD_IS_APU) + ao_cu_num = 2; + else + ao_cu_num = adev->gfx.config.max_cu_per_sh; + amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); mutex_lock(&adev->grbm_idx_mutex); @@ -7286,16 +6917,18 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) bitmap = gfx_v8_0_get_cu_active_bitmap(adev); cu_info->bitmap[i][j] = bitmap; - for (k = 0; k < 16; k ++) { + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { - if (counter < 2) + if (counter < ao_cu_num) ao_bitmap |= mask; counter ++; } mask <<= 1; } active_cu_number += counter; - ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + if (i < 2 && j < 2) + ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } } gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); @@ -7323,7 +6956,7 @@ const struct amdgpu_ip_block_version gfx_v8_1_ip_block = .funcs = &gfx_v8_0_ip_funcs, }; -static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) +static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring) { uint64_t ce_payload_addr; int cnt_ce; @@ -7333,10 +6966,12 @@ static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t c } ce_payload = {}; if (ring->adev->virt.chained_ib_support) { - ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); + ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 + + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; } else { - ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload); + ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 + + offsetof(struct vi_gfx_meta_data, ce_payload); cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; } @@ -7350,15 +6985,16 @@ static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t c amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); } -static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) +static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring) { - uint64_t de_payload_addr, gds_addr; + uint64_t de_payload_addr, gds_addr, csa_addr; int cnt_de; static union { struct vi_de_ib_state regular; struct vi_de_ib_state_chained_ib chained; } de_payload = {}; + csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; gds_addr = csa_addr + 4096; if (ring->adev->virt.chained_ib_support) { de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); @@ -7381,68 +7017,3 @@ static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t c amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); } - -/* create MQD for each compute queue */ -static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring = NULL; - int r, i; - - /* create MQD for KIQ */ - ring = &adev->gfx.kiq.ring; - if (!ring->mqd_obj) { - r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, - &ring->mqd_gpu_addr, &ring->mqd_ptr); - if (r) { - dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); - return r; - } - - /* prepare MQD backup */ - adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL); - if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]) - dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); - } - - /* create MQD for each KCQ */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - if (!ring->mqd_obj) { - r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, - &ring->mqd_gpu_addr, &ring->mqd_ptr); - if (r) { - dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); - return r; - } - - /* prepare MQD backup */ - adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL); - if (!adev->gfx.mec.mqd_backup[i]) - dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); - } - } - - return 0; -} - -static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring = NULL; - int i; - - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - kfree(adev->gfx.mec.mqd_backup[i]); - amdgpu_bo_free_kernel(&ring->mqd_obj, - &ring->mqd_gpu_addr, - &ring->mqd_ptr); - } - - ring = &adev->gfx.kiq.ring; - kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); - amdgpu_bo_free_kernel(&ring->mqd_obj, - &ring->mqd_gpu_addr, - &ring->mqd_ptr); -} diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h index 788cc3ab584b..ec3f11fa986c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h @@ -27,4 +27,9 @@ extern const struct amdgpu_ip_block_version gfx_v8_0_ip_block; extern const struct amdgpu_ip_block_version gfx_v8_1_ip_block; +struct amdgpu_device; +struct vi_mqd; + +int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 0c16b7563b73..69182eeca264 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -21,7 +21,7 @@ * */ #include <linux/firmware.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_gfx.h" #include "soc15.h" @@ -38,8 +38,17 @@ #include "v9_structs.h" #define GFX9_NUM_GFX_RINGS 1 -#define GFX9_NUM_COMPUTE_RINGS 8 -#define RLCG_UCODE_LOADING_START_ADDRESS 0x2000 +#define GFX9_MEC_HPD_SIZE 2048 +#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L +#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L +#define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34 + +#define mmPWR_MISC_CNTL_STATUS 0x0183 +#define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 +#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 +#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 +#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L +#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); @@ -48,6 +57,13 @@ MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); +MODULE_FIRMWARE("amdgpu/raven_ce.bin"); +MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); +MODULE_FIRMWARE("amdgpu/raven_me.bin"); +MODULE_FIRMWARE("amdgpu/raven_mec.bin"); +MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); +MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); + static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = { {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), @@ -86,14 +102,29 @@ static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = static const u32 golden_settings_gc_9_0[] = { - SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00ffeff, 0x00000400, + SOC15_REG_OFFSET(GC, 0, mmCPC_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmCPF_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmCPG_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00fffff, 0x00000420, + SOC15_REG_OFFSET(GC, 0, mmGB_GPU_ID), 0x0000000f, 0x00000000, + SOC15_REG_OFFSET(GC, 0, mmIA_UTCL1_CNTL), 0x08000000, 0x08000080, SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024, SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001, SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000, + SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_0), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_1), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_2), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), 0x00001000, 0x00001000, + SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1), 0x0000000f, 0x01000107, + SOC15_REG_OFFSET(GC, 0, mmSQC_CONFIG), 0x03000000, 0x020a2000, SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000, SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x4a2c0e68, SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0xb5d3f197, - SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000003ff + SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION), 0x3fff3af3, 0x19200000, + SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000003ff, + SOC15_REG_OFFSET(GC, 0, mmWD_UTCL1_CNTL), 0x08000000, 0x08000080 }; static const u32 golden_settings_gc_9_0_vg10[] = @@ -104,11 +135,47 @@ static const u32 golden_settings_gc_9_0_vg10[] = SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x2a114042, SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0x00008000, 0x00048000, SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000, - SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x00001800, 0x00000800, - SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1),0x0000000f, 0x00000007 + SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x00001800, 0x00000800 +}; + +static const u32 golden_settings_gc_9_1[] = +{ + SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL), 0xfffdf3cf, 0x00014104, + SOC15_REG_OFFSET(GC, 0, mmCPC_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmCPF_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmCPG_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00fffff, 0x00000420, + SOC15_REG_OFFSET(GC, 0, mmGB_GPU_ID), 0x0000000f, 0x00000000, + SOC15_REG_OFFSET(GC, 0, mmIA_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024, + SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001, + SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000, + SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_0), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_1), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_2), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000, + SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x00000000, + SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0x00003120, + SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION), 0x3fff3af3, 0x19200000, + SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000000ff, + SOC15_REG_OFFSET(GC, 0, mmWD_UTCL1_CNTL), 0x08000000, 0x08000080 +}; + +static const u32 golden_settings_gc_9_1_rv1[] = +{ + SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL_3), 0x30000000, 0x10000000, + SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), 0xffff77ff, 0x24000042, + SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x24000042, + SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0xffffffff, 0x04048000, + SOC15_REG_OFFSET(GC, 0, mmPA_SC_MODE_CNTL_1), 0x06000000, 0x06000000, + SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000, + SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x01bd9f33, 0x00000800 }; #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 +#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); @@ -118,6 +185,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); +static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { @@ -130,6 +198,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_9_0_vg10, (const u32)ARRAY_SIZE(golden_settings_gc_9_0_vg10)); break; + case CHIP_RAVEN: + amdgpu_program_register_sequence(adev, + golden_settings_gc_9_1, + (const u32)ARRAY_SIZE(golden_settings_gc_9_1)); + amdgpu_program_register_sequence(adev, + golden_settings_gc_9_1_rv1, + (const u32)ARRAY_SIZE(golden_settings_gc_9_1_rv1)); + break; default: break; } @@ -137,7 +213,7 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) { - adev->gfx.scratch.num_reg = 7; + adev->gfx.scratch.num_reg = 8; adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; } @@ -284,6 +360,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; const struct gfx_firmware_header_v1_0 *cp_hdr; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + unsigned int *tmp = NULL; + unsigned int i = 0; DRM_DEBUG("\n"); @@ -291,6 +370,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) case CHIP_VEGA10: chip_name = "vega10"; break; + case CHIP_RAVEN: + chip_name = "raven"; + break; default: BUG(); } @@ -333,9 +415,46 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) if (err) goto out; err = amdgpu_ucode_validate(adev->gfx.rlc_fw); - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); + adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); + adev->gfx.rlc.save_and_restore_offset = + le32_to_cpu(rlc_hdr->save_and_restore_offset); + adev->gfx.rlc.clear_state_descriptor_offset = + le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); + adev->gfx.rlc.avail_scratch_ram_locations = + le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); + adev->gfx.rlc.reg_restore_list_size = + le32_to_cpu(rlc_hdr->reg_restore_list_size); + adev->gfx.rlc.reg_list_format_start = + le32_to_cpu(rlc_hdr->reg_list_format_start); + adev->gfx.rlc.reg_list_format_separate_start = + le32_to_cpu(rlc_hdr->reg_list_format_separate_start); + adev->gfx.rlc.starting_offsets_start = + le32_to_cpu(rlc_hdr->starting_offsets_start); + adev->gfx.rlc.reg_list_format_size_bytes = + le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); + adev->gfx.rlc.reg_list_size_bytes = + le32_to_cpu(rlc_hdr->reg_list_size_bytes); + adev->gfx.rlc.register_list_format = + kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + + adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); + if (!adev->gfx.rlc.register_list_format) { + err = -ENOMEM; + goto out; + } + + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) + adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); + + adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; + + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) + adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); @@ -447,259 +566,322 @@ out: return err; } -static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) +static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) { - int r; + u32 count = 0; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; - if (adev->gfx.mec.hpd_eop_obj) { - r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + /* begin clear state */ + count += 2; + /* context control state */ + count += 3; - amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); - adev->gfx.mec.hpd_eop_obj = NULL; + for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) + count += 2 + ext->reg_count; + else + return 0; + } } - if (adev->gfx.mec.mec_fw_obj) { - r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve mec firmware bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.mec.mec_fw_obj); - amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); - amdgpu_bo_unref(&adev->gfx.mec.mec_fw_obj); - adev->gfx.mec.mec_fw_obj = NULL; + /* end clear state */ + count += 2; + /* clear state */ + count += 2; + + return count; +} + +static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, + volatile u32 *buffer) +{ + u32 count = 0, i; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + if (adev->gfx.rlc.cs_data == NULL) + return; + if (buffer == NULL) + return; + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + buffer[count++] = cpu_to_le32(0x80000000); + buffer[count++] = cpu_to_le32(0x80000000); + + for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) { + buffer[count++] = + cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); + buffer[count++] = cpu_to_le32(ext->reg_index - + PACKET3_SET_CONTEXT_REG_START); + for (i = 0; i < ext->reg_count; i++) + buffer[count++] = cpu_to_le32(ext->extent[i]); + } else { + return; + } + } } + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); + buffer[count++] = cpu_to_le32(0); } -#define MEC_HPD_SIZE 2048 +static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) +{ + uint32_t data; -static int gfx_v9_0_mec_init(struct amdgpu_device *adev) + /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ + WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); + WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); + WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); + WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); + + /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ + WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); + + /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ + WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); + + mutex_lock(&adev->grbm_idx_mutex); + /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); + + /* set mmRLC_LB_PARAMS = 0x003F_1006 */ + data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); + data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); + data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); + WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); + + /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ + data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); + data &= 0x0000FFFF; + data |= 0x00C00000; + WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); + + /* set RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF */ + WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, 0xFFF); + + /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, + * but used for RLC_LB_CNTL configuration */ + data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; + data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); + data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); + WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); + mutex_unlock(&adev->grbm_idx_mutex); +} + +static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) { - int r; - u32 *hpd; - const __le32 *fw_data; - unsigned fw_size; - u32 *fw; + WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); +} - const struct gfx_firmware_header_v1_0 *mec_hdr; +static void rv_init_cp_jump_table(struct amdgpu_device *adev) +{ + const __le32 *fw_data; + volatile u32 *dst_ptr; + int me, i, max_me = 5; + u32 bo_offset = 0; + u32 table_offset, table_size; + + /* write the cp table buffer */ + dst_ptr = adev->gfx.rlc.cp_table_ptr; + for (me = 0; me < max_me; me++) { + if (me == 0) { + const struct gfx_firmware_header_v1_0 *hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; + fw_data = (const __le32 *) + (adev->gfx.ce_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + table_offset = le32_to_cpu(hdr->jt_offset); + table_size = le32_to_cpu(hdr->jt_size); + } else if (me == 1) { + const struct gfx_firmware_header_v1_0 *hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; + fw_data = (const __le32 *) + (adev->gfx.pfp_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + table_offset = le32_to_cpu(hdr->jt_offset); + table_size = le32_to_cpu(hdr->jt_size); + } else if (me == 2) { + const struct gfx_firmware_header_v1_0 *hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; + fw_data = (const __le32 *) + (adev->gfx.me_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + table_offset = le32_to_cpu(hdr->jt_offset); + table_size = le32_to_cpu(hdr->jt_size); + } else if (me == 3) { + const struct gfx_firmware_header_v1_0 *hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + fw_data = (const __le32 *) + (adev->gfx.mec_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + table_offset = le32_to_cpu(hdr->jt_offset); + table_size = le32_to_cpu(hdr->jt_size); + } else if (me == 4) { + const struct gfx_firmware_header_v1_0 *hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; + fw_data = (const __le32 *) + (adev->gfx.mec2_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + table_offset = le32_to_cpu(hdr->jt_offset); + table_size = le32_to_cpu(hdr->jt_size); + } - /* - * we assign only 1 pipe because all other pipes will - * be handled by KFD - */ - adev->gfx.mec.num_mec = 1; - adev->gfx.mec.num_pipe = 1; - adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; - - if (adev->gfx.mec.hpd_eop_obj == NULL) { - r = amdgpu_bo_create(adev, - adev->gfx.mec.num_queue * MEC_HPD_SIZE, - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, - &adev->gfx.mec.hpd_eop_obj); - if (r) { - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); - return r; + for (i = 0; i < table_size; i ++) { + dst_ptr[bo_offset + i] = + cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); } - } - r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); - if (unlikely(r != 0)) { - gfx_v9_0_mec_fini(adev); - return r; - } - r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, - &adev->gfx.mec.hpd_eop_gpu_addr); - if (r) { - dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r); - gfx_v9_0_mec_fini(adev); - return r; - } - r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); - if (r) { - dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); - gfx_v9_0_mec_fini(adev); - return r; + bo_offset += table_size; } +} - memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); +static void gfx_v9_0_rlc_fini(struct amdgpu_device *adev) +{ + /* clear state block */ + amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, + &adev->gfx.rlc.clear_state_gpu_addr, + (void **)&adev->gfx.rlc.cs_ptr); + + /* jump table block */ + amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, + &adev->gfx.rlc.cp_table_gpu_addr, + (void **)&adev->gfx.rlc.cp_table_ptr); +} - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); +static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) +{ + volatile u32 *dst_ptr; + u32 dws; + const struct cs_section_def *cs_data; + int r; - mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + adev->gfx.rlc.cs_data = gfx9_cs_data; - fw_data = (const __le32 *) - (adev->gfx.mec_fw->data + - le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); - fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; + cs_data = adev->gfx.rlc.cs_data; - if (adev->gfx.mec.mec_fw_obj == NULL) { - r = amdgpu_bo_create(adev, - mec_hdr->header.ucode_size_bytes, - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, - &adev->gfx.mec.mec_fw_obj); + if (cs_data) { + /* clear state block */ + adev->gfx.rlc.clear_state_size = dws = gfx_v9_0_get_csb_size(adev); + r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.clear_state_obj, + &adev->gfx.rlc.clear_state_gpu_addr, + (void **)&adev->gfx.rlc.cs_ptr); if (r) { - dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); + dev_err(adev->dev, "(%d) failed to create rlc csb bo\n", + r); + gfx_v9_0_rlc_fini(adev); return r; } + /* set up the cs buffer */ + dst_ptr = adev->gfx.rlc.cs_ptr; + gfx_v9_0_get_csb_buffer(adev, dst_ptr); + amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); } - r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, false); - if (unlikely(r != 0)) { - gfx_v9_0_mec_fini(adev); - return r; - } - r = amdgpu_bo_pin(adev->gfx.mec.mec_fw_obj, AMDGPU_GEM_DOMAIN_GTT, - &adev->gfx.mec.mec_fw_gpu_addr); - if (r) { - dev_warn(adev->dev, "(%d) pin mec firmware bo failed\n", r); - gfx_v9_0_mec_fini(adev); - return r; - } - r = amdgpu_bo_kmap(adev->gfx.mec.mec_fw_obj, (void **)&fw); - if (r) { - dev_warn(adev->dev, "(%d) map firmware bo failed\n", r); - gfx_v9_0_mec_fini(adev); - return r; - } - memcpy(fw, fw_data, fw_size); + if (adev->asic_type == CHIP_RAVEN) { + /* TODO: double check the cp_table_size for RV */ + adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ + r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.cp_table_obj, + &adev->gfx.rlc.cp_table_gpu_addr, + (void **)&adev->gfx.rlc.cp_table_ptr); + if (r) { + dev_err(adev->dev, + "(%d) failed to create cp table bo\n", r); + gfx_v9_0_rlc_fini(adev); + return r; + } - amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); - amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); + rv_init_cp_jump_table(adev); + amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); + amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); + gfx_v9_0_init_lbpw(adev); + } return 0; } -static void gfx_v9_0_kiq_fini(struct amdgpu_device *adev) +static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) { - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - - amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); + amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); } -static int gfx_v9_0_kiq_init(struct amdgpu_device *adev) +static int gfx_v9_0_mec_init(struct amdgpu_device *adev) { int r; u32 *hpd; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - - r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, - &kiq->eop_gpu_addr, (void **)&hpd); - if (r) { - dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); - return r; - } - - memset(hpd, 0, MEC_HPD_SIZE); + const __le32 *fw_data; + unsigned fw_size; + u32 *fw; + size_t mec_hpd_size; - r = amdgpu_bo_reserve(kiq->eop_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); - amdgpu_bo_kunmap(kiq->eop_obj); - amdgpu_bo_unreserve(kiq->eop_obj); + const struct gfx_firmware_header_v1_0 *mec_hdr; - return 0; -} + bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); -static int gfx_v9_0_kiq_init_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - int r = 0; + /* take ownership of the relevant compute queues */ + amdgpu_gfx_compute_queue_acquire(adev); + mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; - r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); - if (r) + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.hpd_eop_obj, + &adev->gfx.mec.hpd_eop_gpu_addr, + (void **)&hpd); + if (r) { + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); + gfx_v9_0_mec_fini(adev); return r; - - ring->adev = NULL; - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_KIQ; - if (adev->gfx.mec2_fw) { - ring->me = 2; - ring->pipe = 0; - } else { - ring->me = 1; - ring->pipe = 1; } - ring->queue = 0; - ring->eop_gpu_addr = kiq->eop_gpu_addr; - sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); - r = amdgpu_ring_init(adev, ring, 1024, - irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); - if (r) - dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); + memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); - return r; -} -static void gfx_v9_0_kiq_free_ring(struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq) -{ - amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); - amdgpu_ring_fini(ring); -} + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); -/* create MQD for each compute queue */ -static int gfx_v9_0_compute_mqd_sw_init(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring = NULL; - int r, i; + mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - /* create MQD for KIQ */ - ring = &adev->gfx.kiq.ring; - if (!ring->mqd_obj) { - r = amdgpu_bo_create_kernel(adev, sizeof(struct v9_mqd), PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, - &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); - if (r) { - dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); - return r; - } + fw_data = (const __le32 *) + (adev->gfx.mec_fw->data + + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; - /*TODO: prepare MQD backup */ + r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.mec_fw_obj, + &adev->gfx.mec.mec_fw_gpu_addr, + (void **)&fw); + if (r) { + dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); + gfx_v9_0_mec_fini(adev); + return r; } - /* create MQD for each KCQ */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - if (!ring->mqd_obj) { - r = amdgpu_bo_create_kernel(adev, sizeof(struct v9_mqd), PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, - &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); - if (r) { - dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); - return r; - } + memcpy(fw, fw_data, fw_size); - /* TODO: prepare MQD backup */ - } - } + amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); + amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); return 0; } -static void gfx_v9_0_compute_mqd_sw_fini(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring = NULL; - int i; - - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); - } - - ring = &adev->gfx.kiq.ring; - amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); -} - static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) { WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, @@ -770,23 +952,21 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: - adev->gfx.config.max_shader_engines = 4; - adev->gfx.config.max_cu_per_sh = 16; - adev->gfx.config.max_sh_per_se = 1; - adev->gfx.config.max_backends_per_se = 4; - adev->gfx.config.max_texture_channel_caches = 16; - adev->gfx.config.max_gprs = 256; - adev->gfx.config.max_gs_threads = 32; adev->gfx.config.max_hw_contexts = 8; - adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; - adev->gfx.config.gs_vgt_table_depth = 32; - adev->gfx.config.gs_prim_buffer_depth = 1792; gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; break; + case CHIP_RAVEN: + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; + break; default: BUG(); break; @@ -1023,13 +1203,61 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) return 0; } +static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + int mec, int pipe, int queue) +{ + int r; + unsigned irq_type; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + + ring = &adev->gfx.compute_ring[ring_id]; + + /* mec0 is me1 */ + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = (AMDGPU_DOORBELL_MEC_RING0 + ring_id) << 1; + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + + (ring_id * GFX9_MEC_HPD_SIZE); + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + + /* type-2 packets are deprecated on MEC, use type-3 instead */ + r = amdgpu_ring_init(adev, ring, 1024, + &adev->gfx.eop_irq, irq_type); + if (r) + return r; + + + return 0; +} + static int gfx_v9_0_sw_init(void *handle) { - int i, r; + int i, j, k, r, ring_id; struct amdgpu_ring *ring; struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + switch (adev->asic_type) { + case CHIP_VEGA10: + case CHIP_RAVEN: + adev->gfx.mec.num_mec = 2; + break; + default: + adev->gfx.mec.num_mec = 1; + break; + } + + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + /* KIQ event */ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq); if (r) @@ -1062,6 +1290,12 @@ static int gfx_v9_0_sw_init(void *handle) return r; } + r = gfx_v9_0_rlc_init(adev); + if (r) { + DRM_ERROR("Failed to init rlc BOs!\n"); + return r; + } + r = gfx_v9_0_mec_init(adev); if (r) { DRM_ERROR("Failed to init MEC BOs!\n"); @@ -1081,49 +1315,40 @@ static int gfx_v9_0_sw_init(void *handle) return r; } - /* set up the compute queues */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - unsigned irq_type; - - /* max 32 queues per MEC */ - if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { - DRM_ERROR("Too many (%d) compute rings!\n", i); - break; + /* set up the compute queues - allocate horizontally across pipes */ + ring_id = 0; + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) + continue; + + r = gfx_v9_0_compute_ring_init(adev, + ring_id, + i, k, j); + if (r) + return r; + + ring_id++; + } } - ring = &adev->gfx.compute_ring[i]; - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->doorbell_index = (AMDGPU_DOORBELL64_MEC_RING0 + i) << 1; - ring->me = 1; /* first MEC */ - ring->pipe = i / 8; - ring->queue = i % 8; - ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); - sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); - irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; - /* type-2 packets are deprecated on MEC, use type-3 instead */ - r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, irq_type); - if (r) - return r; } - if (amdgpu_sriov_vf(adev)) { - r = gfx_v9_0_kiq_init(adev); - if (r) { - DRM_ERROR("Failed to init KIQ BOs!\n"); - return r; - } + r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); + if (r) { + DRM_ERROR("Failed to init KIQ BOs!\n"); + return r; + } - kiq = &adev->gfx.kiq; - r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); - if (r) - return r; + kiq = &adev->gfx.kiq; + r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); + if (r) + return r; - /* create MQD for all compute queues as wel as KIQ for SRIOV case */ - r = gfx_v9_0_compute_mqd_sw_init(adev); - if (r) - return r; - } + /* create MQD for all compute queues as wel as KIQ for SRIOV case */ + r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd)); + if (r) + return r; /* reserve GDS, GWS and OA resource for gfx */ r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, @@ -1170,11 +1395,9 @@ static int gfx_v9_0_sw_fini(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - if (amdgpu_sriov_vf(adev)) { - gfx_v9_0_compute_mqd_sw_fini(adev); - gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); - gfx_v9_0_kiq_fini(adev); - } + amdgpu_gfx_compute_mqd_sw_fini(adev); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); + amdgpu_gfx_kiq_fini(adev); gfx_v9_0_mec_fini(adev); gfx_v9_0_ngg_fini(adev); @@ -1190,27 +1413,24 @@ static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) { - u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); + u32 data; - if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) { - data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); - data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); - } else if (se_num == 0xffffffff) { - data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); + if (instance == 0xffffffff) + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); + else + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); + + if (se_num == 0xffffffff) data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); - } else if (sh_num == 0xffffffff) { - data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); + else data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); - } else { + + if (sh_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); + else data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); - data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); - } - WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); -} -static u32 gfx_v9_0_create_bitmask(u32 bit_width) -{ - return (u32)((1ULL << bit_width) - 1); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); } static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) @@ -1223,8 +1443,8 @@ static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; - mask = gfx_v9_0_create_bitmask(adev->gfx.config.max_backends_per_se / - adev->gfx.config.max_sh_per_se); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se); return (~data) & mask; } @@ -1272,7 +1492,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) sh_mem_config = SH_MEM_ADDRESS_MODE_64 | SH_MEM_ALIGNMENT_MODE_UNALIGNED << - SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; mutex_lock(&adev->srbm_mutex); for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { @@ -1370,9 +1590,6 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, { u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); - if (enable) - return; - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); @@ -1381,6 +1598,373 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); } +static void gfx_v9_0_init_csb(struct amdgpu_device *adev) +{ + /* csib */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), + adev->gfx.rlc.clear_state_gpu_addr >> 32); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), + adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), + adev->gfx.rlc.clear_state_size); +} + +static void gfx_v9_0_parse_ind_reg_list(int *register_list_format, + int indirect_offset, + int list_size, + int *unique_indirect_regs, + int *unique_indirect_reg_count, + int max_indirect_reg_count, + int *indirect_start_offsets, + int *indirect_start_offsets_count, + int max_indirect_start_offsets_count) +{ + int idx; + bool new_entry = true; + + for (; indirect_offset < list_size; indirect_offset++) { + + if (new_entry) { + new_entry = false; + indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; + *indirect_start_offsets_count = *indirect_start_offsets_count + 1; + BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count); + } + + if (register_list_format[indirect_offset] == 0xFFFFFFFF) { + new_entry = true; + continue; + } + + indirect_offset += 2; + + /* look for the matching indice */ + for (idx = 0; idx < *unique_indirect_reg_count; idx++) { + if (unique_indirect_regs[idx] == + register_list_format[indirect_offset]) + break; + } + + if (idx >= *unique_indirect_reg_count) { + unique_indirect_regs[*unique_indirect_reg_count] = + register_list_format[indirect_offset]; + idx = *unique_indirect_reg_count; + *unique_indirect_reg_count = *unique_indirect_reg_count + 1; + BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count); + } + + register_list_format[indirect_offset] = idx; + } +} + +static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) +{ + int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; + int unique_indirect_reg_count = 0; + + int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; + int indirect_start_offsets_count = 0; + + int list_size = 0; + int i = 0; + u32 tmp = 0; + + u32 *register_list_format = + kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); + if (!register_list_format) + return -ENOMEM; + memcpy(register_list_format, adev->gfx.rlc.register_list_format, + adev->gfx.rlc.reg_list_format_size_bytes); + + /* setup unique_indirect_regs array and indirect_start_offsets array */ + gfx_v9_0_parse_ind_reg_list(register_list_format, + GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH, + adev->gfx.rlc.reg_list_format_size_bytes >> 2, + unique_indirect_regs, + &unique_indirect_reg_count, + sizeof(unique_indirect_regs)/sizeof(int), + indirect_start_offsets, + &indirect_start_offsets_count, + sizeof(indirect_start_offsets)/sizeof(int)); + + /* enable auto inc in case it is disabled */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); + tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); + + /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), + RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); + for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), + adev->gfx.rlc.register_restore[i]); + + /* load direct register */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0); + for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), + adev->gfx.rlc.register_restore[i]); + + /* load indirect register */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), + adev->gfx.rlc.reg_list_format_start); + for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), + register_list_format[i]); + + /* set save/restore list size */ + list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; + list_size = list_size >> 1; + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), + adev->gfx.rlc.reg_restore_list_size); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); + + /* write the starting offsets to RLC scratch ram */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), + adev->gfx.rlc.starting_offsets_start); + for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), + indirect_start_offsets[i]); + + /* load unique indirect regs*/ + for (i = 0; i < sizeof(unique_indirect_regs)/sizeof(int); i++) { + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i, + unique_indirect_regs[i] & 0x3FFFF); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i, + unique_indirect_regs[i] >> 20); + } + + kfree(register_list_format); + return 0; +} + +static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) +{ + u32 tmp = 0; + + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); + tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); +} + +static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data = 0; + uint32_t default_data = 0; + + default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); + if (enable == true) { + /* enable GFXIP control over CGPG */ + data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); + + /* update status */ + data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; + data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); + if(default_data != data) + WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); + } else { + /* restore GFXIP control over GCPG */ + data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); + } +} + +static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + + if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_GFX_DMG)) { + /* init IDLE_POLL_COUNT = 60 */ + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); + data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; + data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); + + /* init RLC PG Delay */ + data = 0; + data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); + data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); + data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); + data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); + + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); + data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; + data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); + + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); + data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; + data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); + + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); + data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; + + /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ + data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); + + pwr_10_0_gfxip_control_over_cgpg(adev, true); + } +} + +static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data = 0; + uint32_t default_data = 0; + + default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); + + if (enable == true) { + data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK; + if (default_data != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); + } else { + data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); + } +} + +static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data = 0; + uint32_t default_data = 0; + + default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); + + if (enable == true) { + data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); + } else { + data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); + } +} + +static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data = 0; + uint32_t default_data = 0; + + default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); + + if (enable == true) { + data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); + } else { + data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); + } +} + +static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, default_data; + + default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); + if (enable == true) + data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; + else + data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); +} + +static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, default_data; + + default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); + if (enable == true) + data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK; + else + data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); + + if (!enable) + /* read any GFX register to wake up GFX */ + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); +} + +static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, default_data; + + default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); + if (enable == true) + data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; + else + data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); +} + +static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, default_data; + + default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); + if (enable == true) + data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; + else + data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); +} + +static void gfx_v9_0_init_pg(struct amdgpu_device *adev) +{ + if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_GFX_DMG | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_GDS | + AMD_PG_SUPPORT_RLC_SMU_HS)) { + gfx_v9_0_init_csb(adev); + gfx_v9_0_init_rlc_save_restore_list(adev); + gfx_v9_0_enable_save_restore_machine(adev); + + if (adev->asic_type == CHIP_RAVEN) { + WREG32(mmRLC_JUMP_TABLE_RESTORE, + adev->gfx.rlc.cp_table_gpu_addr >> 8); + gfx_v9_0_init_gfx_power_gating(adev); + + if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { + gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); + gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); + } else { + gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); + gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); + } + + if (adev->pg_flags & AMD_PG_SUPPORT_CP) + gfx_v9_0_enable_cp_power_gating(adev, true); + else + gfx_v9_0_enable_cp_power_gating(adev, false); + } + } +} + void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); @@ -1425,7 +2009,7 @@ static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) * default is 0x9C4 to create a 100us interval */ WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr - * to disable the page fault retry interrupts, default is + * to disable the page fault retry interrupts, default is * 0x100 (256) */ WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); } @@ -1474,6 +2058,8 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) gfx_v9_0_rlc_reset(adev); + gfx_v9_0_init_pg(adev); + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { /* legacy rlc firmware loading */ r = gfx_v9_0_rlc_load_microcode(adev); @@ -1481,6 +2067,13 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) return r; } + if (adev->asic_type == CHIP_RAVEN) { + if (amdgpu_lbpw != 0) + gfx_v9_0_enable_lbpw(adev, true); + else + gfx_v9_0_enable_lbpw(adev, false); + } + gfx_v9_0_rlc_start(adev); return 0; @@ -1559,41 +2152,12 @@ static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) return 0; } -static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) -{ - u32 count = 0; - const struct cs_section_def *sect = NULL; - const struct cs_extent_def *ext = NULL; - - /* begin clear state */ - count += 2; - /* context control state */ - count += 3; - - for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { - for (ext = sect->section; ext->extent != NULL; ++ext) { - if (sect->id == SECT_CONTEXT) - count += 2 + ext->reg_count; - else - return 0; - } - } - /* pa_sc_raster_config/pa_sc_raster_config1 */ - count += 4; - /* end clear state */ - count += 2; - /* clear state */ - count += 2; - - return count; -} - static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) { struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; const struct cs_section_def *sect = NULL; const struct cs_extent_def *ext = NULL; - int r, i; + int r, i, tmp; /* init the CP */ WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); @@ -1601,7 +2165,7 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) gfx_v9_0_cp_gfx_enable(adev, true); - r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4); + r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); return r; @@ -1639,6 +2203,12 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) amdgpu_ring_write(ring, 0x8000); amdgpu_ring_write(ring, 0x8000); + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); + tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | + (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0); + amdgpu_ring_commit(ring); return 0; @@ -1730,13 +2300,6 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) udelay(50); } -static int gfx_v9_0_cp_compute_start(struct amdgpu_device *adev) -{ - gfx_v9_0_cp_compute_enable(adev, true); - - return 0; -} - static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) { const struct gfx_firmware_header_v1_0 *mec_hdr; @@ -1764,7 +2327,7 @@ static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); - + /* MEC1 */ WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, mec_hdr->jt_offset); @@ -1779,45 +2342,6 @@ static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) return 0; } -static void gfx_v9_0_cp_compute_fini(struct amdgpu_device *adev) -{ - int i, r; - - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - - if (ring->mqd_obj) { - r = amdgpu_bo_reserve(ring->mqd_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); - - amdgpu_bo_unpin(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); - - amdgpu_bo_unref(&ring->mqd_obj); - ring->mqd_obj = NULL; - } - } -} - -static int gfx_v9_0_init_queue(struct amdgpu_ring *ring); - -static int gfx_v9_0_cp_compute_resume(struct amdgpu_device *adev) -{ - int i, r; - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - if (gfx_v9_0_init_queue(ring)) - dev_warn(adev->dev, "compute queue %d init failed!\n", i); - } - - r = gfx_v9_0_cp_compute_start(adev); - if (r) - return r; - - return 0; -} - /* KIQ functions */ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) { @@ -1833,51 +2357,95 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); } -static void gfx_v9_0_kiq_enable(struct amdgpu_ring *ring) +static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) { - amdgpu_ring_alloc(ring, 8); - /* set resources */ - amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6)); - amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ - amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */ - amdgpu_ring_write(ring, 0); /* queue mask hi */ - amdgpu_ring_write(ring, 0); /* gws mask lo */ - amdgpu_ring_write(ring, 0); /* gws mask hi */ - amdgpu_ring_write(ring, 0); /* oac mask */ - amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */ - amdgpu_ring_commit(ring); - udelay(50); -} + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + uint32_t scratch, tmp = 0; + uint64_t queue_mask = 0; + int r, i; + + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { + if (!test_bit(i, adev->gfx.mec.queue_bitmap)) + continue; + + /* This situation may be hit in the future if a new HW + * generation exposes more than 64 queues. If so, the + * definition of queue_mask needs updating */ + if (WARN_ON(i >= (sizeof(queue_mask)*8))) { + DRM_ERROR("Invalid KCQ enabled: %d\n", i); + break; + } + + queue_mask |= (1ull << i); + } + + r = amdgpu_gfx_scratch_get(adev, &scratch); + if (r) { + DRM_ERROR("Failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + + r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 11); + if (r) { + DRM_ERROR("Failed to lock KIQ (%d).\n", r); + amdgpu_gfx_scratch_free(adev, scratch); + return r; + } -static void gfx_v9_0_map_queue_enable(struct amdgpu_ring *kiq_ring, - struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = kiq_ring->adev; - uint64_t mqd_addr, wptr_addr; - - mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); - wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - amdgpu_ring_alloc(kiq_ring, 8); - - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); - /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ - (0 << 4) | /* Queue_Sel */ - (0 << 8) | /* VMID */ - (ring->queue << 13 ) | - (ring->pipe << 16) | - ((ring->me == 1 ? 0 : 1) << 18) | - (0 << 21) | /*queue_type: normal compute queue */ - (1 << 24) | /* alloc format: all_on_one_pipe */ - (0 << 26) | /* engine_sel: compute */ - (1 << 29)); /* num_queues: must be 1 */ - amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2)); - amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); + /* set resources */ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | + PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ + amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(1) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ + amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); + } + /* write to scratch for completion */ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); + amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); + amdgpu_ring_write(kiq_ring, 0xDEADBEEF); amdgpu_ring_commit(kiq_ring); - udelay(50); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i >= adev->usec_timeout) { + DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } + amdgpu_gfx_scratch_free(adev, scratch); + + return r; } static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) @@ -1902,7 +2470,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, - (order_base_2(MEC_HPD_SIZE / 4) - 1)); + (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); mqd->cp_hqd_eop_control = tmp; @@ -2119,47 +2687,69 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct v9_mqd *mqd = ring->mqd_ptr; - bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ); int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; - if (is_kiq) { - gfx_v9_0_kiq_setting(&kiq->ring); + gfx_v9_0_kiq_setting(ring); + + if (adev->gfx.in_reset) { /* for GPU_RESET case */ + /* reset MQD to a clean status */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); + + /* reset ring buffer */ + ring->wptr = 0; + amdgpu_ring_clear_ring(ring); + + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v9_0_kiq_init_register(ring); + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); } else { - mqd_idx = ring - &adev->gfx.compute_ring[0]; + memset((void *)mqd, 0, sizeof(*mqd)); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v9_0_mqd_init(ring); + gfx_v9_0_kiq_init_register(ring); + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); } - if (!adev->gfx.in_reset) { + return 0; +} + +static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v9_mqd *mqd = ring->mqd_ptr; + int mqd_idx = ring - &adev->gfx.compute_ring[0]; + + if (!adev->gfx.in_reset && !adev->gfx.in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); gfx_v9_0_mqd_init(ring); - if (is_kiq) - gfx_v9_0_kiq_init_register(ring); soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - } else { /* for GPU_RESET case */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + } else if (adev->gfx.in_reset) { /* for GPU_RESET case */ /* reset MQD to a clean status */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset ring buffer */ ring->wptr = 0; - - if (is_kiq) { - mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - gfx_v9_0_kiq_init_register(ring); - soc15_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - } + amdgpu_ring_clear_ring(ring); + } else { + amdgpu_ring_clear_ring(ring); } - if (is_kiq) - gfx_v9_0_kiq_enable(ring); - else - gfx_v9_0_map_queue_enable(&kiq->ring, ring); - return 0; } @@ -2194,7 +2784,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v9_0_kiq_init_queue(ring); + r = gfx_v9_0_kcq_init_queue(ring); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -2203,13 +2793,14 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) goto done; } + r = gfx_v9_0_kiq_kcq_enable(adev); done: return r; } static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) { - int r,i; + int r, i; struct amdgpu_ring *ring; if (!(adev->flags & AMD_IS_APU)) @@ -2230,10 +2821,7 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) if (r) return r; - if (amdgpu_sriov_vf(adev)) - r = gfx_v9_0_kiq_resume(adev); - else - r = gfx_v9_0_cp_compute_resume(adev); + r = gfx_v9_0_kiq_resume(adev); if (r) return r; @@ -2243,6 +2831,13 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) ring->ready = false; return r; } + + ring = &adev->gfx.kiq.ring; + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) + ring->ready = false; + for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; @@ -2252,14 +2847,6 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) ring->ready = false; } - if (amdgpu_sriov_vf(adev)) { - ring = &adev->gfx.kiq.ring; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) - ring->ready = false; - } - gfx_v9_0_enable_gui_idle_interrupt(adev, true); return 0; @@ -2307,7 +2894,6 @@ static int gfx_v9_0_hw_fini(void *handle) } gfx_v9_0_cp_enable(adev, false); gfx_v9_0_rlc_stop(adev); - gfx_v9_0_cp_compute_fini(adev); return 0; } @@ -2316,14 +2902,18 @@ static int gfx_v9_0_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->gfx.in_suspend = true; return gfx_v9_0_hw_fini(adev); } static int gfx_v9_0_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; - return gfx_v9_0_hw_init(adev); + r = gfx_v9_0_hw_init(adev); + adev->gfx.in_suspend = false; + return r; } static bool gfx_v9_0_is_idle(void *handle) @@ -2470,7 +3060,7 @@ static int gfx_v9_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = GFX9_NUM_COMPUTE_RINGS; + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; gfx_v9_0_set_ring_funcs(adev); gfx_v9_0_set_irq_funcs(adev); gfx_v9_0_set_gds_init(adev); @@ -2549,6 +3139,43 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev) } } +static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, + bool enable) +{ + /* TODO: double check if we need to perform under safe mdoe */ + /* gfx_v9_0_enter_rlc_safe_mode(adev); */ + + if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { + gfx_v9_0_enable_gfx_cg_power_gating(adev, true); + if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) + gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); + } else { + gfx_v9_0_enable_gfx_cg_power_gating(adev, false); + gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); + } + + /* gfx_v9_0_exit_rlc_safe_mode(adev); */ +} + +static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, + bool enable) +{ + /* TODO: double check if we need to perform under safe mode */ + /* gfx_v9_0_enter_rlc_safe_mode(adev); */ + + if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) + gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); + else + gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); + + if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) + gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); + else + gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); + + /* gfx_v9_0_exit_rlc_safe_mode(adev); */ +} + static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable) { @@ -2739,6 +3366,34 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { static int gfx_v9_0_set_powergating_state(void *handle, enum amd_powergating_state state) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_PG_STATE_GATE) ? true : false; + + switch (adev->asic_type) { + case CHIP_RAVEN: + if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { + gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); + gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); + } else { + gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); + gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); + } + + if (adev->pg_flags & AMD_PG_SUPPORT_CP) + gfx_v9_0_enable_cp_power_gating(adev, true); + else + gfx_v9_0_enable_cp_power_gating(adev, false); + + /* update gfx cgpg state */ + gfx_v9_0_update_gfx_cg_power_gating(adev, enable); + + /* update mgcg state */ + gfx_v9_0_update_gfx_mg_power_gating(adev, enable); + break; + default: + break; + } + return 0; } @@ -2752,6 +3407,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_VEGA10: + case CHIP_RAVEN: gfx_v9_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); break; @@ -2879,31 +3535,33 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, struct amdgpu_ib *ib, unsigned vm_id, bool ctx_switch) { - u32 header, control = 0; + u32 header, control = 0; - if (ib->flags & AMDGPU_IB_FLAG_CE) - header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); - else - header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); + if (ib->flags & AMDGPU_IB_FLAG_CE) + header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); + else + header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); - control |= ib->length_dw | (vm_id << 24); + control |= ib->length_dw | (vm_id << 24); - if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) - control |= INDIRECT_BUFFER_PRE_ENB(1); + if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { + control |= INDIRECT_BUFFER_PRE_ENB(1); - amdgpu_ring_write(ring, header); - BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ - amdgpu_ring_write(ring, + if (!(ib->flags & AMDGPU_IB_FLAG_CE)) + gfx_v9_0_ring_emit_de_meta(ring); + } + + amdgpu_ring_write(ring, header); +BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN - (2 << 0) | + (2 << 0) | #endif - lower_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, control); + lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, control); } -#define INDIRECT_BUFFER_VALID (1 << 23) - static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, struct amdgpu_ib *ib, unsigned vm_id, bool ctx_switch) @@ -2971,9 +3629,8 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); unsigned eng = ring->vm_inv_eng; - pd_addr = pd_addr | 0x1; /* valid bit */ - /* now only use physical base address of PDE and valid */ - BUG_ON(pd_addr & 0xFFFF00000000003EULL); + pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); + pd_addr |= AMDGPU_PTE_VALID; gfx_v9_0_write_data_to_reg(ring, usepfp, true, hub->ctx0_ptb_addr_lo32 + (2 * vm_id), @@ -3130,9 +3787,6 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); amdgpu_ring_write(ring, dw2); amdgpu_ring_write(ring, 0); - - if (amdgpu_sriov_vf(ring->adev)) - gfx_v9_0_ring_emit_de_meta(ring); } static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) @@ -3160,6 +3814,12 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne ring->ring[offset] = (ring->ring_size>>2) - offset + cur; } +static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); + amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ +} + static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) { struct amdgpu_device *adev = ring->adev; @@ -3208,8 +3868,8 @@ static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, u32 mec_int_cntl, mec_int_cntl_reg; /* - * amdgpu controls only pipe 0 of MEC1. That's why this function only - * handles the setting of interrupts for this specific pipe. All other + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other * pipes' interrupts are set by amdkfd. */ @@ -3218,6 +3878,15 @@ static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, case 0: mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); break; + case 1: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); + break; + case 2: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); + break; + case 3: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); + break; default: DRM_DEBUG("invalid pipe %d\n", pipe); return; @@ -3433,7 +4102,7 @@ static int gfx_v9_0_kiq_irq(struct amdgpu_device *adev, return 0; } -const struct amd_ip_funcs gfx_v9_0_ip_funcs = { +static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .name = "gfx_v9_0", .early_init = gfx_v9_0_early_init, .late_init = gfx_v9_0_late_init, @@ -3494,6 +4163,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, + .emit_tmz = gfx_v9_0_ring_emit_tmz, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { @@ -3605,6 +4275,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: + case CHIP_RAVEN: adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; break; default: @@ -3640,6 +4311,20 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) } } +static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, + u32 bitmap) +{ + u32 data; + + if (!bitmap) + return; + + data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; + data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; + + WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); +} + static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) { u32 data, mask; @@ -3650,7 +4335,7 @@ static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; - mask = gfx_v9_0_create_bitmask(adev->gfx.config.max_cu_per_sh); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); return (~data) & mask; } @@ -3660,11 +4345,12 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, { int i, j, k, counter, active_cu_number = 0; u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; + unsigned disable_masks[4 * 2]; if (!adev || !cu_info) return -EINVAL; - memset(cu_info, 0, sizeof(*cu_info)); + amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { @@ -3673,19 +4359,24 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, ao_bitmap = 0; counter = 0; gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); + if (i < 4 && j < 2) + gfx_v9_0_set_user_cu_inactive_bitmap( + adev, disable_masks[i * 2 + j]); bitmap = gfx_v9_0_get_cu_active_bitmap(adev); cu_info->bitmap[i][j] = bitmap; - for (k = 0; k < 16; k ++) { + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { - if (counter < 2) + if (counter < adev->gfx.config.max_cu_per_sh) ao_bitmap |= mask; counter ++; } mask <<= 1; } active_cu_number += counter; - ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + if (i < 2 && j < 2) + ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } } gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); @@ -3697,218 +4388,6 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, return 0; } -static int gfx_v9_0_init_queue(struct amdgpu_ring *ring) -{ - int r, j; - u32 tmp; - bool use_doorbell = true; - u64 hqd_gpu_addr; - u64 mqd_gpu_addr; - u64 eop_gpu_addr; - u64 wb_gpu_addr; - u32 *buf; - struct v9_mqd *mqd; - struct amdgpu_device *adev; - - adev = ring->adev; - if (ring->mqd_obj == NULL) { - r = amdgpu_bo_create(adev, - sizeof(struct v9_mqd), - PAGE_SIZE,true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, - NULL, &ring->mqd_obj); - if (r) { - dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); - return r; - } - } - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - gfx_v9_0_cp_compute_fini(adev); - return r; - } - - r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, - &mqd_gpu_addr); - if (r) { - dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); - gfx_v9_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); - if (r) { - dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); - gfx_v9_0_cp_compute_fini(adev); - return r; - } - - /* init the mqd struct */ - memset(buf, 0, sizeof(struct v9_mqd)); - - mqd = (struct v9_mqd *)buf; - mqd->header = 0xC0310800; - mqd->compute_pipelinestat_enable = 0x00000001; - mqd->compute_static_thread_mgmt_se0 = 0xffffffff; - mqd->compute_static_thread_mgmt_se1 = 0xffffffff; - mqd->compute_static_thread_mgmt_se2 = 0xffffffff; - mqd->compute_static_thread_mgmt_se3 = 0xffffffff; - mqd->compute_misc_reserved = 0x00000003; - mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, - ring->pipe, - ring->queue, 0); - /* disable wptr polling */ - WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); - - /* write the EOP addr */ - BUG_ON(ring->me != 1 || ring->pipe != 0); /* can't handle other cases eop address */ - eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring->queue * MEC_HPD_SIZE); - eop_gpu_addr >>= 8; - - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, lower_32_bits(eop_gpu_addr)); - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); - mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_gpu_addr); - mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_gpu_addr); - - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, - (order_base_2(MEC_HPD_SIZE / 4) - 1)); - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, tmp); - - /* enable doorbell? */ - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); - if (use_doorbell) - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - else - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); - - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); - mqd->cp_hqd_pq_doorbell_control = tmp; - - /* disable the queue if it's active */ - ring->wptr = 0; - mqd->cp_hqd_dequeue_request = 0; - mqd->cp_hqd_pq_rptr = 0; - mqd->cp_hqd_pq_wptr_lo = 0; - mqd->cp_hqd_pq_wptr_hi = 0; - if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); - for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) - break; - udelay(1); - } - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); - } - - /* set the pointer to the MQD */ - mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; - mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); - WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); - WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); - - /* set MQD vmid to 0 */ - tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); - WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, tmp); - mqd->cp_mqd_control = tmp; - - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - hqd_gpu_addr = ring->gpu_addr >> 8; - mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; - mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); - - /* set up the HQD, this is similar to CP_RB0_CNTL */ - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, - (order_base_2(ring->ring_size / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); -#ifdef __BIG_ENDIAN - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); -#endif - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, tmp); - mqd->cp_hqd_pq_control = tmp; - - /* set the wb address wether it's enabled or not */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); - mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; - mqd->cp_hqd_pq_rptr_report_addr_hi = - upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, - mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - mqd->cp_hqd_pq_rptr_report_addr_hi); - - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; - mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, - mqd->cp_hqd_pq_wptr_poll_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, - mqd->cp_hqd_pq_wptr_poll_addr_hi); - - /* enable the doorbell if requested */ - if (use_doorbell) { - WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, - (AMDGPU_DOORBELL64_KIQ * 2) << 2); - WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, - (AMDGPU_DOORBELL64_MEC_RING7 * 2) << 2); - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_OFFSET, ring->doorbell_index); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); - mqd->cp_hqd_pq_doorbell_control = tmp; - - } else { - mqd->cp_hqd_pq_doorbell_control = 0; - } - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, - mqd->cp_hqd_pq_doorbell_control); - - /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); - - /* set the vmid for the queue */ - mqd->cp_hqd_vmid = 0; - WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); - - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); - tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); - WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, tmp); - mqd->cp_hqd_persistent_state = tmp; - - /* activate the queue */ - mqd->cp_hqd_active = 1; - WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active); - - soc15_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - - amdgpu_bo_kunmap(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); - - if (use_doorbell) - WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); - - return 0; -} - const struct amdgpu_ip_block_version gfx_v9_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_GFX, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h index 56ef652a575d..fa5a3fbaf6ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h @@ -24,7 +24,6 @@ #ifndef __GFX_V9_0_H__ #define __GFX_V9_0_H__ -extern const struct amd_ip_funcs gfx_v9_0_ip_funcs; extern const struct amdgpu_ip_block_version gfx_v9_0_ip_block; void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 005075ff00f7..6c8040e616c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -31,178 +31,163 @@ #include "soc15_common.h" -int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) +u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev) { - u32 tmp; - u64 value; - u32 i; + return (u64)RREG32_SOC15(GC, 0, mmMC_VM_FB_OFFSET) << 24; +} - /* Program MC. */ - /* Update configuration */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR), - adev->mc.vram_start >> 18); - WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR), - adev->mc.vram_end >> 18); +static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev) +{ + uint64_t value; - value = adev->vram_scratch.gpu_addr - adev->mc.vram_start + BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); + value = adev->gart.table_addr - adev->mc.vram_start + adev->vm_manager.vram_base_offset; - WREG32(SOC15_REG_OFFSET(GC, 0, - mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB), - (u32)(value >> 12)); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB), - (u32)(value >> 44)); + value &= 0x0000FFFFFFFFF000ULL; + value |= 0x1; /*valid bit*/ - if (amdgpu_sriov_vf(adev)) { - /* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are VF copy registers so - vbios post doesn't program them, for SRIOV driver need to program them */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_FB_LOCATION_BASE), - adev->mc.vram_start >> 24); - WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_FB_LOCATION_TOP), - adev->mc.vram_end >> 24); - } + WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + lower_32_bits(value)); + + WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + upper_32_bits(value)); +} + +static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) +{ + gfxhub_v1_0_init_gart_pt_regs(adev); + + WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->mc.gart_start >> 12)); + WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->mc.gart_start >> 44)); + + WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->mc.gart_end >> 12)); + WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->mc.gart_end >> 44)); +} + +static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t value; /* Disable AGP. */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_BASE), 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_TOP), 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_BOT), 0xFFFFFFFF); + WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0); + WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, 0); + WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, 0xFFFFFFFF); - /* GART Enable. */ + /* Program the system aperture low logical page number. */ + WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, + adev->mc.vram_start >> 18); + WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + adev->mc.vram_end >> 18); + + /* Set default page address. */ + value = adev->vram_scratch.gpu_addr - adev->mc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page.addr >> 12)); + WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page.addr >> 44)); + + WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); +} + +static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; /* Setup TLB control */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL)); + tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - SYSTEM_ACCESS_MODE, - 3); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - ENABLE_ADVANCED_DRIVER_MODEL, - 1); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - SYSTEM_APERTURE_UNMAPPED_ACCESS, - 0); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - ECO_BITS, - 0); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - MTYPE, - MTYPE_UC);/* XXX for emulation. */ - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - ATC_EN, - 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC);/* XXX for emulation. */ + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); + + WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); +} + +static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; /* Setup L2 cache */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL)); + tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL, - ENABLE_L2_FRAGMENT_PROCESSING, - 0); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL, - L2_PDE0_CACHE_TAG_GENERATION_MODE, - 0);/* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, + 0); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL, - CONTEXT1_IDENTITY_ACCESS_MODE, - 1); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL, - IDENTITY_MODE_FRAGMENT_SIZE, - 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL), tmp); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15(GC, 0, mmVM_L2_CNTL, tmp); - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL2)); + tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL2); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL2), tmp); + WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp); tmp = mmVM_L2_CNTL3_DEFAULT; - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL3), tmp); - - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL4)); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL4, - VMC_TAP_PDE_REQUEST_PHYSICAL, - 0); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL4, - VMC_TAP_PTE_REQUEST_PHYSICAL, - 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL4), tmp); - - /* setup context0 */ - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32), - (u32)(adev->mc.gtt_start >> 12)); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32), - (u32)(adev->mc.gtt_start >> 44)); - - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32), - (u32)(adev->mc.gtt_end >> 12)); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32), - (u32)(adev->mc.gtt_end >> 44)); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp); + + tmp = mmVM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15(GC, 0, mmVM_L2_CNTL4, tmp); +} - BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); - value = adev->gart.table_addr - adev->mc.vram_start - + adev->vm_manager.vram_base_offset; - value &= 0x0000FFFFFFFFF000ULL; - value |= 0x1; /*valid bit*/ +static void gfxhub_v1_0_enable_system_domain(struct amdgpu_device *adev) +{ + uint32_t tmp; - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32), - (u32)value); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32), - (u32)(value >> 32)); - - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32), - (u32)(adev->dummy_page.addr >> 12)); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32), - (u32)((u64)adev->dummy_page.addr >> 44)); - - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL2)); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2, - ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, - 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL2), tmp); - - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL)); + tmp = RREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL), tmp); + WREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL, tmp); +} - /* Disable identity aperture.*/ - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32), 0XFFFFFFFF); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32), 0x0000000F); +static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) +{ + WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0XFFFFFFFF); + WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x0000000F); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32), 0); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32), 0); + WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, + 0); + WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, + 0); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32), 0); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32), 0); + WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0); + WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0); + +} + +static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) +{ + int i; + uint32_t tmp; for (i = 0; i <= 14; i++) { - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL) + i); + tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, adev->vm_manager.num_level); @@ -223,15 +208,55 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, adev->vm_manager.block_size - 9); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL) + i, tmp); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2, + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i, tmp); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2, + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, upper_32_bits(adev->vm_manager.max_pfn - 1)); } +} + +static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev) +{ + unsigned i; + + for (i = 0 ; i < 18; ++i) { + WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + 2 * i, 0xffffffff); + WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + 2 * i, 0x1f); + } +} + +int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) { + /* + * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are + * VF copy registers so vbios post doesn't program them, for + * SRIOV driver need to program them + */ + WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE, + adev->mc.vram_start >> 24); + WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP, + adev->mc.vram_end >> 24); + } + /* GART Enable. */ + gfxhub_v1_0_init_gart_aperture_regs(adev); + gfxhub_v1_0_init_system_aperture_regs(adev); + gfxhub_v1_0_init_tlb_regs(adev); + gfxhub_v1_0_init_cache_regs(adev); + + gfxhub_v1_0_enable_system_domain(adev); + gfxhub_v1_0_disable_identity_aperture(adev); + gfxhub_v1_0_setup_vmid_config(adev); + gfxhub_v1_0_program_invalidation(adev); return 0; } @@ -243,22 +268,20 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) /* Disable all tables */ for (i = 0; i < 16; i++) - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL) + i, 0); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL, i, 0); /* Setup TLB control */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL)); + tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_ADVANCED_DRIVER_MODEL, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); + WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); /* Setup L2 cache */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL)); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL), tmp); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL3), 0); + WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, 0); } /** @@ -271,7 +294,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { u32 tmp; - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL)); + tmp = RREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, @@ -296,22 +319,11 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp); -} - -static int gfxhub_v1_0_early_init(void *handle) -{ - return 0; + WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp); } -static int gfxhub_v1_0_late_init(void *handle) +void gfxhub_v1_0_init(struct amdgpu_device *adev) { - return 0; -} - -static int gfxhub_v1_0_sw_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB]; hub->ctx0_ptb_addr_lo32 = @@ -330,96 +342,4 @@ static int gfxhub_v1_0_sw_init(void *handle) SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS); hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL); - - return 0; -} - -static int gfxhub_v1_0_sw_fini(void *handle) -{ - return 0; -} - -static int gfxhub_v1_0_hw_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - unsigned i; - - for (i = 0 ; i < 18; ++i) { - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32) + - 2 * i, 0xffffffff); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32) + - 2 * i, 0x1f); - } - - return 0; -} - -static int gfxhub_v1_0_hw_fini(void *handle) -{ - return 0; -} - -static int gfxhub_v1_0_suspend(void *handle) -{ - return 0; -} - -static int gfxhub_v1_0_resume(void *handle) -{ - return 0; -} - -static bool gfxhub_v1_0_is_idle(void *handle) -{ - return true; } - -static int gfxhub_v1_0_wait_for_idle(void *handle) -{ - return 0; -} - -static int gfxhub_v1_0_soft_reset(void *handle) -{ - return 0; -} - -static int gfxhub_v1_0_set_clockgating_state(void *handle, - enum amd_clockgating_state state) -{ - return 0; -} - -static int gfxhub_v1_0_set_powergating_state(void *handle, - enum amd_powergating_state state) -{ - return 0; -} - -const struct amd_ip_funcs gfxhub_v1_0_ip_funcs = { - .name = "gfxhub_v1_0", - .early_init = gfxhub_v1_0_early_init, - .late_init = gfxhub_v1_0_late_init, - .sw_init = gfxhub_v1_0_sw_init, - .sw_fini = gfxhub_v1_0_sw_fini, - .hw_init = gfxhub_v1_0_hw_init, - .hw_fini = gfxhub_v1_0_hw_fini, - .suspend = gfxhub_v1_0_suspend, - .resume = gfxhub_v1_0_resume, - .is_idle = gfxhub_v1_0_is_idle, - .wait_for_idle = gfxhub_v1_0_wait_for_idle, - .soft_reset = gfxhub_v1_0_soft_reset, - .set_clockgating_state = gfxhub_v1_0_set_clockgating_state, - .set_powergating_state = gfxhub_v1_0_set_powergating_state, -}; - -const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block = -{ - .type = AMD_IP_BLOCK_TYPE_GFXHUB, - .major = 1, - .minor = 0, - .rev = 0, - .funcs = &gfxhub_v1_0_ip_funcs, -}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h index 5129a8ff0932..206e29cad753 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h @@ -28,8 +28,7 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev); void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev); void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value); - -extern const struct amd_ip_funcs gfxhub_v1_0_ip_funcs; -extern const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block; +void gfxhub_v1_0_init(struct amdgpu_device *adev); +u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index d860939152df..5be9c83dfcf7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -21,7 +21,7 @@ * */ #include <linux/firmware.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "gmc_v6_0.h" #include "amdgpu_ucode.h" @@ -66,14 +66,10 @@ static const u32 crtc_offsets[6] = SI_CRTC5_REGISTER_OFFSET }; -static void gmc_v6_0_mc_stop(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) +static void gmc_v6_0_mc_stop(struct amdgpu_device *adev) { u32 blackout; - if (adev->mode_info.num_crtc) - amdgpu_display_stop_mc_access(adev, save); - gmc_v6_0_wait_for_idle((void *)adev); blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL); @@ -90,8 +86,7 @@ static void gmc_v6_0_mc_stop(struct amdgpu_device *adev, } -static void gmc_v6_0_mc_resume(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) +static void gmc_v6_0_mc_resume(struct amdgpu_device *adev) { u32 tmp; @@ -103,10 +98,6 @@ static void gmc_v6_0_mc_resume(struct amdgpu_device *adev, tmp = REG_SET_FIELD(0, BIF_FB_EN, FB_READ_EN, 1); tmp = REG_SET_FIELD(tmp, BIF_FB_EN, FB_WRITE_EN, 1); WREG32(mmBIF_FB_EN, tmp); - - if (adev->mode_info.num_crtc) - amdgpu_display_resume_mc_access(adev, save); - } static int gmc_v6_0_init_microcode(struct amdgpu_device *adev) @@ -228,20 +219,20 @@ static int gmc_v6_0_mc_load_microcode(struct amdgpu_device *adev) static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc) { + u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; + base <<= 24; + if (mc->mc_vram_size > 0xFFC0000000ULL) { dev_warn(adev->dev, "limiting VRAM\n"); mc->real_vram_size = 0xFFC0000000ULL; mc->mc_vram_size = 0xFFC0000000ULL; } - amdgpu_vram_location(adev, &adev->mc, 0); - adev->mc.gtt_base_align = 0; - amdgpu_gtt_location(adev, mc); + amdgpu_vram_location(adev, &adev->mc, base); + amdgpu_gart_location(adev, mc); } static void gmc_v6_0_mc_program(struct amdgpu_device *adev) { - struct amdgpu_mode_mc_save save; - u32 tmp; int i, j; /* Initialize HDP */ @@ -254,16 +245,23 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev) } WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0); - if (adev->mode_info.num_crtc) - amdgpu_display_set_vga_render_state(adev, false); - - gmc_v6_0_mc_stop(adev, &save); - if (gmc_v6_0_wait_for_idle((void *)adev)) { dev_warn(adev->dev, "Wait for MC idle timedout !\n"); } - WREG32(mmVGA_HDP_CONTROL, VGA_HDP_CONTROL__VGA_MEMORY_DISABLE_MASK); + if (adev->mode_info.num_crtc) { + u32 tmp; + + /* Lockout access through VGA aperture*/ + tmp = RREG32(mmVGA_HDP_CONTROL); + tmp |= VGA_HDP_CONTROL__VGA_MEMORY_DISABLE_MASK; + WREG32(mmVGA_HDP_CONTROL, tmp); + + /* disable VGA render */ + tmp = RREG32(mmVGA_RENDER_CONTROL); + tmp &= ~VGA_VSTATUS_CNTL; + WREG32(mmVGA_RENDER_CONTROL, tmp); + } /* Update configuration */ WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, adev->mc.vram_start >> 12); @@ -271,13 +269,6 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev) adev->mc.vram_end >> 12); WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, adev->vram_scratch.gpu_addr >> 12); - tmp = ((adev->mc.vram_end >> 24) & 0xFFFF) << 16; - tmp |= ((adev->mc.vram_start >> 24) & 0xFFFF); - WREG32(mmMC_VM_FB_LOCATION, tmp); - /* XXX double check these! */ - WREG32(mmHDP_NONSURFACE_BASE, (adev->mc.vram_start >> 8)); - WREG32(mmHDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); - WREG32(mmHDP_NONSURFACE_SIZE, 0x3FFFFFFF); WREG32(mmMC_VM_AGP_BASE, 0); WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF); WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF); @@ -285,7 +276,6 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev) if (gmc_v6_0_wait_for_idle((void *)adev)) { dev_warn(adev->dev, "Wait for MC idle timedout !\n"); } - gmc_v6_0_mc_resume(adev, &save); } static int gmc_v6_0_mc_init(struct amdgpu_device *adev) @@ -342,14 +332,23 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev) adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; adev->mc.visible_vram_size = adev->mc.aper_size; - /* unless the user had overridden it, set the gart - * size equal to the 1024 or vram, whichever is larger. - */ - if (amdgpu_gart_size == -1) - adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), - adev->mc.mc_vram_size); - else - adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; + /* set the gart size */ + if (amdgpu_gart_size == -1) { + switch (adev->asic_type) { + case CHIP_HAINAN: /* no MM engines */ + default: + adev->mc.gart_size = 256ULL << 20; + break; + case CHIP_VERDE: /* UVD, VCE do not support GPUVM */ + case CHIP_TAHITI: /* UVD, VCE do not support GPUVM */ + case CHIP_PITCAIRN: /* UVD, VCE do not support GPUVM */ + case CHIP_OLAND: /* UVD, VCE do not support GPUVM */ + adev->mc.gart_size = 1024ULL << 20; + break; + } + } else { + adev->mc.gart_size = (u64)amdgpu_gart_size << 20; + } gmc_v6_0_vram_gtt_location(adev, &adev->mc); @@ -395,6 +394,12 @@ static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev, return pte_flag; } +static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr) +{ + BUG_ON(addr & 0xFFFFFF0000000FFFULL); + return addr; +} + static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { @@ -473,6 +478,7 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable) static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) { int r, i; + u32 field; if (adev->gart.robj == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); @@ -500,13 +506,15 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) WREG32(mmVM_L2_CNTL2, VM_L2_CNTL2__INVALIDATE_ALL_L1_TLBS_MASK | VM_L2_CNTL2__INVALIDATE_L2_CACHE_MASK); + + field = adev->vm_manager.fragment_size; WREG32(mmVM_L2_CNTL3, VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK | - (4UL << VM_L2_CNTL3__BANK_SELECT__SHIFT) | - (4UL << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT)); + (field << VM_L2_CNTL3__BANK_SELECT__SHIFT) | + (field << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT)); /* setup context0 */ - WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12); - WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(adev->dummy_page.addr >> 12)); @@ -553,7 +561,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) gmc_v6_0_gart_flush_gpu_tlb(adev, 0); dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(adev->mc.gtt_size >> 20), + (unsigned)(adev->mc.gart_size >> 20), (unsigned long long)adev->gart.table_addr); adev->gart.ready = true; return 0; @@ -614,33 +622,6 @@ static void gmc_v6_0_gart_fini(struct amdgpu_device *adev) amdgpu_gart_fini(adev); } -static int gmc_v6_0_vm_init(struct amdgpu_device *adev) -{ - /* - * number of VMs - * VMID 0 is reserved for System - * amdgpu graphics/compute will use VMIDs 1-7 - * amdkfd will use VMIDs 8-15 - */ - adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.num_level = 1; - amdgpu_vm_manager_init(adev); - - /* base offset of vram pages */ - if (adev->flags & AMD_IS_APU) { - u64 tmp = RREG32(mmMC_VM_FB_OFFSET); - tmp <<= 22; - adev->vm_manager.vram_base_offset = tmp; - } else - adev->vm_manager.vram_base_offset = 0; - - return 0; -} - -static void gmc_v6_0_vm_fini(struct amdgpu_device *adev) -{ -} - static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev, u32 status, u32 addr, u32 mc_client) { @@ -815,14 +796,6 @@ static int gmc_v6_0_early_init(void *handle) gmc_v6_0_set_gart_funcs(adev); gmc_v6_0_set_irq_funcs(adev); - if (adev->flags & AMD_IS_APU) { - adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; - } else { - u32 tmp = RREG32(mmMC_SEQ_MISC0); - tmp &= MC_SEQ_MISC0__MT__MASK; - adev->mc.vram_type = gmc_v6_0_convert_vram_type(tmp); - } - return 0; } @@ -842,6 +815,14 @@ static int gmc_v6_0_sw_init(void *handle) int dma_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->flags & AMD_IS_APU) { + adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; + } else { + u32 tmp = RREG32(mmMC_SEQ_MISC0); + tmp &= MC_SEQ_MISC0__MT__MASK; + adev->mc.vram_type = gmc_v6_0_convert_vram_type(tmp); + } + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault); if (r) return r; @@ -850,11 +831,13 @@ static int gmc_v6_0_sw_init(void *handle) if (r) return r; - amdgpu_vm_adjust_size(adev, 64); + amdgpu_vm_adjust_size(adev, 64, 4); adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; adev->mc.mc_mask = 0xffffffffffULL; + adev->mc.stolen_size = 256 * 1024; + adev->need_dma32 = false; dma_bits = adev->need_dma32 ? 32 : 40; r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); @@ -887,26 +870,34 @@ static int gmc_v6_0_sw_init(void *handle) if (r) return r; - if (!adev->vm_manager.enabled) { - r = gmc_v6_0_vm_init(adev); - if (r) { - dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); - return r; - } - adev->vm_manager.enabled = true; + /* + * number of VMs + * VMID 0 is reserved for System + * amdgpu graphics/compute will use VMIDs 1-7 + * amdkfd will use VMIDs 8-15 + */ + adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.num_level = 1; + amdgpu_vm_manager_init(adev); + + /* base offset of vram pages */ + if (adev->flags & AMD_IS_APU) { + u64 tmp = RREG32(mmMC_VM_FB_OFFSET); + + tmp <<= 22; + adev->vm_manager.vram_base_offset = tmp; + } else { + adev->vm_manager.vram_base_offset = 0; } - return r; + return 0; } static int gmc_v6_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->vm_manager.enabled) { - gmc_v6_0_vm_fini(adev); - adev->vm_manager.enabled = false; - } + amdgpu_vm_manager_fini(adev); gmc_v6_0_gart_fini(adev); amdgpu_gem_force_release(adev); amdgpu_bo_fini(adev); @@ -984,16 +975,10 @@ static bool gmc_v6_0_is_idle(void *handle) static int gmc_v6_0_wait_for_idle(void *handle) { unsigned i; - u32 tmp; struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__MCB_BUSY_MASK | - SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK | - SRBM_STATUS__MCC_BUSY_MASK | - SRBM_STATUS__MCD_BUSY_MASK | - SRBM_STATUS__VMC_BUSY_MASK); - if (!tmp) + if (gmc_v6_0_is_idle(handle)) return 0; udelay(1); } @@ -1004,7 +989,6 @@ static int gmc_v6_0_wait_for_idle(void *handle) static int gmc_v6_0_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_mode_mc_save save; u32 srbm_soft_reset = 0; u32 tmp = RREG32(mmSRBM_STATUS); @@ -1020,7 +1004,7 @@ static int gmc_v6_0_soft_reset(void *handle) } if (srbm_soft_reset) { - gmc_v6_0_mc_stop(adev, &save); + gmc_v6_0_mc_stop(adev); if (gmc_v6_0_wait_for_idle(adev)) { dev_warn(adev->dev, "Wait for GMC idle timed out !\n"); } @@ -1040,7 +1024,7 @@ static int gmc_v6_0_soft_reset(void *handle) udelay(50); - gmc_v6_0_mc_resume(adev, &save); + gmc_v6_0_mc_resume(adev); udelay(50); } @@ -1146,6 +1130,7 @@ static const struct amdgpu_gart_funcs gmc_v6_0_gart_funcs = { .flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb, .set_pte_pde = gmc_v6_0_gart_set_pte_pde, .set_prt = gmc_v6_0_set_prt, + .get_vm_pde = gmc_v6_0_get_vm_pde, .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags }; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 2750e5c23813..eace9e7182c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -21,7 +21,7 @@ * */ #include <linux/firmware.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "cikd.h" #include "cik.h" @@ -37,6 +37,9 @@ #include "oss/oss_2_0_d.h" #include "oss/oss_2_0_sh_mask.h" +#include "dce/dce_8_0_d.h" +#include "dce/dce_8_0_sh_mask.h" + #include "amdgpu_atombios.h" static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev); @@ -76,14 +79,10 @@ static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev) } } -static void gmc_v7_0_mc_stop(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) +static void gmc_v7_0_mc_stop(struct amdgpu_device *adev) { u32 blackout; - if (adev->mode_info.num_crtc) - amdgpu_display_stop_mc_access(adev, save); - gmc_v7_0_wait_for_idle((void *)adev); blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL); @@ -99,8 +98,7 @@ static void gmc_v7_0_mc_stop(struct amdgpu_device *adev, udelay(100); } -static void gmc_v7_0_mc_resume(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) +static void gmc_v7_0_mc_resume(struct amdgpu_device *adev) { u32 tmp; @@ -112,9 +110,6 @@ static void gmc_v7_0_mc_resume(struct amdgpu_device *adev, tmp = REG_SET_FIELD(0, BIF_FB_EN, FB_READ_EN, 1); tmp = REG_SET_FIELD(tmp, BIF_FB_EN, FB_WRITE_EN, 1); WREG32(mmBIF_FB_EN, tmp); - - if (adev->mode_info.num_crtc) - amdgpu_display_resume_mc_access(adev, save); } /** @@ -242,15 +237,17 @@ static int gmc_v7_0_mc_load_microcode(struct amdgpu_device *adev) static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc) { + u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; + base <<= 24; + if (mc->mc_vram_size > 0xFFC0000000ULL) { /* leave room for at least 1024M GTT */ dev_warn(adev->dev, "limiting VRAM\n"); mc->real_vram_size = 0xFFC0000000ULL; mc->mc_vram_size = 0xFFC0000000ULL; } - amdgpu_vram_location(adev, &adev->mc, 0); - adev->mc.gtt_base_align = 0; - amdgpu_gtt_location(adev, mc); + amdgpu_vram_location(adev, &adev->mc, base); + amdgpu_gart_location(adev, mc); } /** @@ -263,7 +260,6 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev, */ static void gmc_v7_0_mc_program(struct amdgpu_device *adev) { - struct amdgpu_mode_mc_save save; u32 tmp; int i, j; @@ -277,13 +273,20 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev) } WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0); - if (adev->mode_info.num_crtc) - amdgpu_display_set_vga_render_state(adev, false); - - gmc_v7_0_mc_stop(adev, &save); if (gmc_v7_0_wait_for_idle((void *)adev)) { dev_warn(adev->dev, "Wait for MC idle timedout !\n"); } + if (adev->mode_info.num_crtc) { + /* Lockout access through VGA aperture*/ + tmp = RREG32(mmVGA_HDP_CONTROL); + tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); + WREG32(mmVGA_HDP_CONTROL, tmp); + + /* disable VGA render */ + tmp = RREG32(mmVGA_RENDER_CONTROL); + tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); + WREG32(mmVGA_RENDER_CONTROL, tmp); + } /* Update configuration */ WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, adev->mc.vram_start >> 12); @@ -291,20 +294,12 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev) adev->mc.vram_end >> 12); WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, adev->vram_scratch.gpu_addr >> 12); - tmp = ((adev->mc.vram_end >> 24) & 0xFFFF) << 16; - tmp |= ((adev->mc.vram_start >> 24) & 0xFFFF); - WREG32(mmMC_VM_FB_LOCATION, tmp); - /* XXX double check these! */ - WREG32(mmHDP_NONSURFACE_BASE, (adev->mc.vram_start >> 8)); - WREG32(mmHDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); - WREG32(mmHDP_NONSURFACE_SIZE, 0x3FFFFFFF); WREG32(mmMC_VM_AGP_BASE, 0); WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF); WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF); if (gmc_v7_0_wait_for_idle((void *)adev)) { dev_warn(adev->dev, "Wait for MC idle timedout !\n"); } - gmc_v7_0_mc_resume(adev, &save); WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); @@ -391,14 +386,26 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) if (adev->mc.visible_vram_size > adev->mc.real_vram_size) adev->mc.visible_vram_size = adev->mc.real_vram_size; - /* unless the user had overridden it, set the gart - * size equal to the 1024 or vram, whichever is larger. - */ - if (amdgpu_gart_size == -1) - adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), - adev->mc.mc_vram_size); - else - adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; + /* set the gart size */ + if (amdgpu_gart_size == -1) { + switch (adev->asic_type) { + case CHIP_TOPAZ: /* no MM engines */ + default: + adev->mc.gart_size = 256ULL << 20; + break; +#ifdef CONFIG_DRM_AMDGPU_CIK + case CHIP_BONAIRE: /* UVD, VCE do not support GPUVM */ + case CHIP_HAWAII: /* UVD, VCE do not support GPUVM */ + case CHIP_KAVERI: /* UVD, VCE do not support GPUVM */ + case CHIP_KABINI: /* UVD, VCE do not support GPUVM */ + case CHIP_MULLINS: /* UVD, VCE do not support GPUVM */ + adev->mc.gart_size = 1024ULL << 20; + break; +#endif + } + } else { + adev->mc.gart_size = (u64)amdgpu_gart_size << 20; + } gmc_v7_0_vram_gtt_location(adev, &adev->mc); @@ -472,6 +479,12 @@ static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev, return pte_flag; } +static uint64_t gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr) +{ + BUG_ON(addr & 0xFFFFFF0000000FFFULL); + return addr; +} + /** * gmc_v8_0_set_fault_enable_default - update VM fault handling * @@ -569,7 +582,7 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable) static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) { int r, i; - u32 tmp; + u32 tmp, field; if (adev->gart.robj == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); @@ -599,14 +612,16 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); WREG32(mmVM_L2_CNTL2, tmp); + + field = adev->vm_manager.fragment_size; tmp = RREG32(mmVM_L2_CNTL3); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 4); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 4); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field); WREG32(mmVM_L2_CNTL3, tmp); /* setup context0 */ - WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12); - WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(adev->dummy_page.addr >> 12)); @@ -660,7 +675,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) gmc_v7_0_gart_flush_gpu_tlb(adev, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(adev->mc.gtt_size >> 20), + (unsigned)(adev->mc.gart_size >> 20), (unsigned long long)adev->gart.table_addr); adev->gart.ready = true; return 0; @@ -724,55 +739,6 @@ static void gmc_v7_0_gart_fini(struct amdgpu_device *adev) amdgpu_gart_fini(adev); } -/* - * vm - * VMID 0 is the physical GPU addresses as used by the kernel. - * VMIDs 1-15 are used for userspace clients and are handled - * by the amdgpu vm/hsa code. - */ -/** - * gmc_v7_0_vm_init - cik vm init callback - * - * @adev: amdgpu_device pointer - * - * Inits cik specific vm parameters (number of VMs, base of vram for - * VMIDs 1-15) (CIK). - * Returns 0 for success. - */ -static int gmc_v7_0_vm_init(struct amdgpu_device *adev) -{ - /* - * number of VMs - * VMID 0 is reserved for System - * amdgpu graphics/compute will use VMIDs 1-7 - * amdkfd will use VMIDs 8-15 - */ - adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.num_level = 1; - amdgpu_vm_manager_init(adev); - - /* base offset of vram pages */ - if (adev->flags & AMD_IS_APU) { - u64 tmp = RREG32(mmMC_VM_FB_OFFSET); - tmp <<= 22; - adev->vm_manager.vram_base_offset = tmp; - } else - adev->vm_manager.vram_base_offset = 0; - - return 0; -} - -/** - * gmc_v7_0_vm_fini - cik vm fini callback - * - * @adev: amdgpu_device pointer - * - * Tear down any asic specific VM setup (CIK). - */ -static void gmc_v7_0_vm_fini(struct amdgpu_device *adev) -{ -} - /** * gmc_v7_0_vm_decode_fault - print human readable fault info * @@ -1004,7 +970,7 @@ static int gmc_v7_0_sw_init(void *handle) * Currently set to 4GB ((1 << 20) 4k pages). * Max GPUVM size for cayman and SI is 40 bits. */ - amdgpu_vm_adjust_size(adev, 64); + amdgpu_vm_adjust_size(adev, 64, 4); adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; /* Set the internal MC address mask @@ -1013,6 +979,8 @@ static int gmc_v7_0_sw_init(void *handle) */ adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ + adev->mc.stolen_size = 256 * 1024; + /* set DMA mask + need_dma32 flags. * PCIE - can handle 40-bits. * IGP - can handle 40-bits @@ -1051,27 +1019,34 @@ static int gmc_v7_0_sw_init(void *handle) if (r) return r; - if (!adev->vm_manager.enabled) { - r = gmc_v7_0_vm_init(adev); - if (r) { - dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); - return r; - } - adev->vm_manager.enabled = true; + /* + * number of VMs + * VMID 0 is reserved for System + * amdgpu graphics/compute will use VMIDs 1-7 + * amdkfd will use VMIDs 8-15 + */ + adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.num_level = 1; + amdgpu_vm_manager_init(adev); + + /* base offset of vram pages */ + if (adev->flags & AMD_IS_APU) { + u64 tmp = RREG32(mmMC_VM_FB_OFFSET); + + tmp <<= 22; + adev->vm_manager.vram_base_offset = tmp; + } else { + adev->vm_manager.vram_base_offset = 0; } - return r; + return 0; } static int gmc_v7_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->vm_manager.enabled) { - amdgpu_vm_manager_fini(adev); - gmc_v7_0_vm_fini(adev); - adev->vm_manager.enabled = false; - } + amdgpu_vm_manager_fini(adev); gmc_v7_0_gart_fini(adev); amdgpu_gem_force_release(adev); amdgpu_bo_fini(adev); @@ -1172,7 +1147,6 @@ static int gmc_v7_0_wait_for_idle(void *handle) static int gmc_v7_0_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_mode_mc_save save; u32 srbm_soft_reset = 0; u32 tmp = RREG32(mmSRBM_STATUS); @@ -1188,7 +1162,7 @@ static int gmc_v7_0_soft_reset(void *handle) } if (srbm_soft_reset) { - gmc_v7_0_mc_stop(adev, &save); + gmc_v7_0_mc_stop(adev); if (gmc_v7_0_wait_for_idle((void *)adev)) { dev_warn(adev->dev, "Wait for GMC idle timed out !\n"); } @@ -1209,7 +1183,7 @@ static int gmc_v7_0_soft_reset(void *handle) /* Wait a little for things to settle down */ udelay(50); - gmc_v7_0_mc_resume(adev, &save); + gmc_v7_0_mc_resume(adev); udelay(50); } @@ -1335,7 +1309,8 @@ static const struct amdgpu_gart_funcs gmc_v7_0_gart_funcs = { .flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb, .set_pte_pde = gmc_v7_0_gart_set_pte_pde, .set_prt = gmc_v7_0_set_prt, - .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags + .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags, + .get_vm_pde = gmc_v7_0_get_vm_pde }; static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index f56b4089ee9f..3b3326daf32b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -21,7 +21,7 @@ * */ #include <linux/firmware.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "gmc_v8_0.h" #include "amdgpu_ucode.h" @@ -35,6 +35,9 @@ #include "oss/oss_3_0_d.h" #include "oss/oss_3_0_sh_mask.h" +#include "dce/dce_10_0_d.h" +#include "dce/dce_10_0_sh_mask.h" + #include "vid.h" #include "vi.h" @@ -161,14 +164,10 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev) } } -static void gmc_v8_0_mc_stop(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) +static void gmc_v8_0_mc_stop(struct amdgpu_device *adev) { u32 blackout; - if (adev->mode_info.num_crtc) - amdgpu_display_stop_mc_access(adev, save); - gmc_v8_0_wait_for_idle(adev); blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL); @@ -184,8 +183,7 @@ static void gmc_v8_0_mc_stop(struct amdgpu_device *adev, udelay(100); } -static void gmc_v8_0_mc_resume(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) +static void gmc_v8_0_mc_resume(struct amdgpu_device *adev) { u32 tmp; @@ -197,9 +195,6 @@ static void gmc_v8_0_mc_resume(struct amdgpu_device *adev, tmp = REG_SET_FIELD(0, BIF_FB_EN, FB_READ_EN, 1); tmp = REG_SET_FIELD(tmp, BIF_FB_EN, FB_WRITE_EN, 1); WREG32(mmBIF_FB_EN, tmp); - - if (adev->mode_info.num_crtc) - amdgpu_display_resume_mc_access(adev, save); } /** @@ -404,15 +399,20 @@ static int gmc_v8_0_polaris_mc_load_microcode(struct amdgpu_device *adev) static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc) { + u64 base = 0; + + if (!amdgpu_sriov_vf(adev)) + base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; + base <<= 24; + if (mc->mc_vram_size > 0xFFC0000000ULL) { /* leave room for at least 1024M GTT */ dev_warn(adev->dev, "limiting VRAM\n"); mc->real_vram_size = 0xFFC0000000ULL; mc->mc_vram_size = 0xFFC0000000ULL; } - amdgpu_vram_location(adev, &adev->mc, 0); - adev->mc.gtt_base_align = 0; - amdgpu_gtt_location(adev, mc); + amdgpu_vram_location(adev, &adev->mc, base); + amdgpu_gart_location(adev, mc); } /** @@ -425,7 +425,6 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, */ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) { - struct amdgpu_mode_mc_save save; u32 tmp; int i, j; @@ -439,13 +438,20 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) } WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0); - if (adev->mode_info.num_crtc) - amdgpu_display_set_vga_render_state(adev, false); - - gmc_v8_0_mc_stop(adev, &save); if (gmc_v8_0_wait_for_idle((void *)adev)) { dev_warn(adev->dev, "Wait for MC idle timedout !\n"); } + if (adev->mode_info.num_crtc) { + /* Lockout access through VGA aperture*/ + tmp = RREG32(mmVGA_HDP_CONTROL); + tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); + WREG32(mmVGA_HDP_CONTROL, tmp); + + /* disable VGA render */ + tmp = RREG32(mmVGA_RENDER_CONTROL); + tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); + WREG32(mmVGA_RENDER_CONTROL, tmp); + } /* Update configuration */ WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, adev->mc.vram_start >> 12); @@ -453,20 +459,23 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) adev->mc.vram_end >> 12); WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, adev->vram_scratch.gpu_addr >> 12); - tmp = ((adev->mc.vram_end >> 24) & 0xFFFF) << 16; - tmp |= ((adev->mc.vram_start >> 24) & 0xFFFF); - WREG32(mmMC_VM_FB_LOCATION, tmp); - /* XXX double check these! */ - WREG32(mmHDP_NONSURFACE_BASE, (adev->mc.vram_start >> 8)); - WREG32(mmHDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); - WREG32(mmHDP_NONSURFACE_SIZE, 0x3FFFFFFF); + + if (amdgpu_sriov_vf(adev)) { + tmp = ((adev->mc.vram_end >> 24) & 0xFFFF) << 16; + tmp |= ((adev->mc.vram_start >> 24) & 0xFFFF); + WREG32(mmMC_VM_FB_LOCATION, tmp); + /* XXX double check these! */ + WREG32(mmHDP_NONSURFACE_BASE, (adev->mc.vram_start >> 8)); + WREG32(mmHDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); + WREG32(mmHDP_NONSURFACE_SIZE, 0x3FFFFFFF); + } + WREG32(mmMC_VM_AGP_BASE, 0); WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF); WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF); if (gmc_v8_0_wait_for_idle((void *)adev)) { dev_warn(adev->dev, "Wait for MC idle timedout !\n"); } - gmc_v8_0_mc_resume(adev, &save); WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); @@ -553,14 +562,25 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) if (adev->mc.visible_vram_size > adev->mc.real_vram_size) adev->mc.visible_vram_size = adev->mc.real_vram_size; - /* unless the user had overridden it, set the gart - * size equal to the 1024 or vram, whichever is larger. - */ - if (amdgpu_gart_size == -1) - adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), - adev->mc.mc_vram_size); - else - adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; + /* set the gart size */ + if (amdgpu_gart_size == -1) { + switch (adev->asic_type) { + case CHIP_POLARIS11: /* all engines support GPUVM */ + case CHIP_POLARIS10: /* all engines support GPUVM */ + case CHIP_POLARIS12: /* all engines support GPUVM */ + default: + adev->mc.gart_size = 256ULL << 20; + break; + case CHIP_TONGA: /* UVD, VCE do not support GPUVM */ + case CHIP_FIJI: /* UVD, VCE do not support GPUVM */ + case CHIP_CARRIZO: /* UVD, VCE do not support GPUVM, DCE SG support */ + case CHIP_STONEY: /* UVD does not support GPUVM, DCE SG support */ + adev->mc.gart_size = 1024ULL << 20; + break; + } + } else { + adev->mc.gart_size = (u64)amdgpu_gart_size << 20; + } gmc_v8_0_vram_gtt_location(adev, &adev->mc); @@ -656,6 +676,12 @@ static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev, return pte_flag; } +static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr) +{ + BUG_ON(addr & 0xFFFFFF0000000FFFULL); + return addr; +} + /** * gmc_v8_0_set_fault_enable_default - update VM fault handling * @@ -755,7 +781,7 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable) static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) { int r, i; - u32 tmp; + u32 tmp, field; if (adev->gart.robj == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); @@ -786,10 +812,12 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); WREG32(mmVM_L2_CNTL2, tmp); + + field = adev->vm_manager.fragment_size; tmp = RREG32(mmVM_L2_CNTL3); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 4); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 4); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field); WREG32(mmVM_L2_CNTL3, tmp); /* XXX: set to enable PTE/PDE in system memory */ tmp = RREG32(mmVM_L2_CNTL4); @@ -807,8 +835,8 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT1_PTE_REQUEST_SNOOP, 0); WREG32(mmVM_L2_CNTL4, tmp); /* setup context0 */ - WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12); - WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(adev->dummy_page.addr >> 12)); @@ -863,7 +891,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) gmc_v8_0_gart_flush_gpu_tlb(adev, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(adev->mc.gtt_size >> 20), + (unsigned)(adev->mc.gart_size >> 20), (unsigned long long)adev->gart.table_addr); adev->gart.ready = true; return 0; @@ -927,55 +955,6 @@ static void gmc_v8_0_gart_fini(struct amdgpu_device *adev) amdgpu_gart_fini(adev); } -/* - * vm - * VMID 0 is the physical GPU addresses as used by the kernel. - * VMIDs 1-15 are used for userspace clients and are handled - * by the amdgpu vm/hsa code. - */ -/** - * gmc_v8_0_vm_init - cik vm init callback - * - * @adev: amdgpu_device pointer - * - * Inits cik specific vm parameters (number of VMs, base of vram for - * VMIDs 1-15) (CIK). - * Returns 0 for success. - */ -static int gmc_v8_0_vm_init(struct amdgpu_device *adev) -{ - /* - * number of VMs - * VMID 0 is reserved for System - * amdgpu graphics/compute will use VMIDs 1-7 - * amdkfd will use VMIDs 8-15 - */ - adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.num_level = 1; - amdgpu_vm_manager_init(adev); - - /* base offset of vram pages */ - if (adev->flags & AMD_IS_APU) { - u64 tmp = RREG32(mmMC_VM_FB_OFFSET); - tmp <<= 22; - adev->vm_manager.vram_base_offset = tmp; - } else - adev->vm_manager.vram_base_offset = 0; - - return 0; -} - -/** - * gmc_v8_0_vm_fini - cik vm fini callback - * - * @adev: amdgpu_device pointer - * - * Tear down any asic specific VM setup (CIK). - */ -static void gmc_v8_0_vm_fini(struct amdgpu_device *adev) -{ -} - /** * gmc_v8_0_vm_decode_fault - print human readable fault info * @@ -1088,7 +1067,7 @@ static int gmc_v8_0_sw_init(void *handle) * Currently set to 4GB ((1 << 20) 4k pages). * Max GPUVM size for cayman and SI is 40 bits. */ - amdgpu_vm_adjust_size(adev, 64); + amdgpu_vm_adjust_size(adev, 64, 4); adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; /* Set the internal MC address mask @@ -1097,6 +1076,8 @@ static int gmc_v8_0_sw_init(void *handle) */ adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ + adev->mc.stolen_size = 256 * 1024; + /* set DMA mask + need_dma32 flags. * PCIE - can handle 40-bits. * IGP - can handle 40-bits @@ -1135,27 +1116,34 @@ static int gmc_v8_0_sw_init(void *handle) if (r) return r; - if (!adev->vm_manager.enabled) { - r = gmc_v8_0_vm_init(adev); - if (r) { - dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); - return r; - } - adev->vm_manager.enabled = true; + /* + * number of VMs + * VMID 0 is reserved for System + * amdgpu graphics/compute will use VMIDs 1-7 + * amdkfd will use VMIDs 8-15 + */ + adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.num_level = 1; + amdgpu_vm_manager_init(adev); + + /* base offset of vram pages */ + if (adev->flags & AMD_IS_APU) { + u64 tmp = RREG32(mmMC_VM_FB_OFFSET); + + tmp <<= 22; + adev->vm_manager.vram_base_offset = tmp; + } else { + adev->vm_manager.vram_base_offset = 0; } - return r; + return 0; } static int gmc_v8_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->vm_manager.enabled) { - amdgpu_vm_manager_fini(adev); - gmc_v8_0_vm_fini(adev); - adev->vm_manager.enabled = false; - } + amdgpu_vm_manager_fini(adev); gmc_v8_0_gart_fini(adev); amdgpu_gem_force_release(adev); amdgpu_bo_fini(adev); @@ -1294,7 +1282,7 @@ static int gmc_v8_0_pre_soft_reset(void *handle) if (!adev->mc.srbm_soft_reset) return 0; - gmc_v8_0_mc_stop(adev, &adev->mc.save); + gmc_v8_0_mc_stop(adev); if (gmc_v8_0_wait_for_idle(adev)) { dev_warn(adev->dev, "Wait for GMC idle timed out !\n"); } @@ -1340,7 +1328,7 @@ static int gmc_v8_0_post_soft_reset(void *handle) if (!adev->mc.srbm_soft_reset) return 0; - gmc_v8_0_mc_resume(adev, &adev->mc.save); + gmc_v8_0_mc_resume(adev); return 0; } @@ -1654,7 +1642,8 @@ static const struct amdgpu_gart_funcs gmc_v8_0_gart_funcs = { .flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb, .set_pte_pde = gmc_v8_0_gart_set_pte_pde, .set_prt = gmc_v8_0_set_prt, - .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags + .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags, + .get_vm_pde = gmc_v8_0_get_vm_pde }; static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index f936332a069d..d04d0b123212 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -23,16 +23,20 @@ #include <linux/firmware.h> #include "amdgpu.h" #include "gmc_v9_0.h" +#include "amdgpu_atomfirmware.h" #include "vega10/soc15ip.h" #include "vega10/HDP/hdp_4_0_offset.h" #include "vega10/HDP/hdp_4_0_sh_mask.h" #include "vega10/GC/gc_9_0_sh_mask.h" +#include "vega10/DC/dce_12_0_offset.h" +#include "vega10/DC/dce_12_0_sh_mask.h" #include "vega10/vega10_enum.h" #include "soc15_common.h" #include "nbio_v6_1.h" +#include "nbio_v7_0.h" #include "gfxhub_v1_0.h" #include "mmhub_v1_0.h" @@ -215,7 +219,10 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, unsigned i, j; /* flush hdp cache */ - nbio_v6_1_hdp_flush(adev); + if (adev->flags & AMD_IS_APU) + nbio_v7_0_hdp_flush(adev); + else + nbio_v6_1_hdp_flush(adev); spin_lock(&adev->mc.invalidate_lock); @@ -354,17 +361,19 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev, return pte_flag; } -static u64 gmc_v9_0_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) +static u64 gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, u64 addr) { - return adev->vm_manager.vram_base_offset + mc_addr - adev->mc.vram_start; + addr = adev->vm_manager.vram_base_offset + addr - adev->mc.vram_start; + BUG_ON(addr & 0xFFFF00000000003FULL); + return addr; } static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = { .flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb, .set_pte_pde = gmc_v9_0_gart_set_pte_pde, - .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags, - .adjust_mc_addr = gmc_v9_0_adjust_mc_addr, .get_invalidate_req = gmc_v9_0_get_invalidate_req, + .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags, + .get_vm_pde = gmc_v9_0_get_vm_pde }; static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev) @@ -413,8 +422,12 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, if (!amdgpu_sriov_vf(adev)) base = mmhub_v1_0_get_fb_location(adev); amdgpu_vram_location(adev, &adev->mc, base); - adev->mc.gtt_base_align = 0; - amdgpu_gtt_location(adev, mc); + amdgpu_gart_location(adev, mc); + /* base offset of vram pages */ + if (adev->flags & AMD_IS_APU) + adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); + else + adev->vm_manager.vram_base_offset = 0; } /** @@ -431,50 +444,54 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) u32 tmp; int chansize, numchan; - /* hbm memory channel size */ - chansize = 128; - - tmp = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_CS_AON0_DramBaseAddress0)); - tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK; - tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; - switch (tmp) { - case 0: - default: - numchan = 1; - break; - case 1: - numchan = 2; - break; - case 2: - numchan = 0; - break; - case 3: - numchan = 4; - break; - case 4: - numchan = 0; - break; - case 5: - numchan = 8; - break; - case 6: - numchan = 0; - break; - case 7: - numchan = 16; - break; - case 8: - numchan = 2; - break; + adev->mc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); + if (!adev->mc.vram_width) { + /* hbm memory channel size */ + chansize = 128; + + tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0); + tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK; + tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; + switch (tmp) { + case 0: + default: + numchan = 1; + break; + case 1: + numchan = 2; + break; + case 2: + numchan = 0; + break; + case 3: + numchan = 4; + break; + case 4: + numchan = 0; + break; + case 5: + numchan = 8; + break; + case 6: + numchan = 0; + break; + case 7: + numchan = 16; + break; + case 8: + numchan = 2; + break; + } + adev->mc.vram_width = numchan * chansize; } - adev->mc.vram_width = numchan * chansize; /* Could aper size report 0 ? */ adev->mc.aper_base = pci_resource_start(adev->pdev, 0); adev->mc.aper_size = pci_resource_len(adev->pdev, 0); /* size in MB on si */ adev->mc.mc_vram_size = - nbio_v6_1_get_memsize(adev) * 1024ULL * 1024ULL; + ((adev->flags & AMD_IS_APU) ? nbio_v7_0_get_memsize(adev) : + nbio_v6_1_get_memsize(adev)) * 1024ULL * 1024ULL; adev->mc.real_vram_size = adev->mc.mc_vram_size; adev->mc.visible_vram_size = adev->mc.aper_size; @@ -482,14 +499,20 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) if (adev->mc.visible_vram_size > adev->mc.real_vram_size) adev->mc.visible_vram_size = adev->mc.real_vram_size; - /* unless the user had overridden it, set the gart - * size equal to the 1024 or vram, whichever is larger. - */ - if (amdgpu_gart_size == -1) - adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), - adev->mc.mc_vram_size); - else - adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; + /* set the gart size */ + if (amdgpu_gart_size == -1) { + switch (adev->asic_type) { + case CHIP_VEGA10: /* all engines support GPUVM */ + default: + adev->mc.gart_size = 256ULL << 20; + break; + case CHIP_RAVEN: /* DCE SG support */ + adev->mc.gart_size = 1024ULL << 20; + break; + } + } else { + adev->mc.gart_size = (u64)amdgpu_gart_size << 20; + } gmc_v9_0_vram_gtt_location(adev, &adev->mc); @@ -514,70 +537,32 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) return amdgpu_gart_table_vram_alloc(adev); } -/* - * vm - * VMID 0 is the physical GPU addresses as used by the kernel. - * VMIDs 1-15 are used for userspace clients and are handled - * by the amdgpu vm/hsa code. - */ -/** - * gmc_v9_0_vm_init - vm init callback - * - * @adev: amdgpu_device pointer - * - * Inits vega10 specific vm parameters (number of VMs, base of vram for - * VMIDs 1-15) (vega10). - * Returns 0 for success. - */ -static int gmc_v9_0_vm_init(struct amdgpu_device *adev) -{ - /* - * number of VMs - * VMID 0 is reserved for System - * amdgpu graphics/compute will use VMIDs 1-7 - * amdkfd will use VMIDs 8-15 - */ - adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; - - /* TODO: fix num_level for APU when updating vm size and block size */ - if (adev->flags & AMD_IS_APU) - adev->vm_manager.num_level = 1; - else - adev->vm_manager.num_level = 3; - amdgpu_vm_manager_init(adev); - - /* base offset of vram pages */ - /*XXX This value is not zero for APU*/ - adev->vm_manager.vram_base_offset = 0; - - return 0; -} - -/** - * gmc_v9_0_vm_fini - vm fini callback - * - * @adev: amdgpu_device pointer - * - * Tear down any asic specific VM setup. - */ -static void gmc_v9_0_vm_fini(struct amdgpu_device *adev) -{ - return; -} - static int gmc_v9_0_sw_init(void *handle) { int r; int dma_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + gfxhub_v1_0_init(adev); + mmhub_v1_0_init(adev); + spin_lock_init(&adev->mc.invalidate_lock); - if (adev->flags & AMD_IS_APU) { + switch (adev->asic_type) { + case CHIP_RAVEN: adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; - amdgpu_vm_adjust_size(adev, 64); - } else { + if (adev->rev_id == 0x0 || adev->rev_id == 0x1) { + adev->vm_manager.vm_size = 1U << 18; + adev->vm_manager.block_size = 9; + adev->vm_manager.num_level = 3; + amdgpu_vm_set_fragment_size(adev, 9); + } else { + /* vm_size is 64GB for legacy 2-level page support */ + amdgpu_vm_adjust_size(adev, 64, 9); + adev->vm_manager.num_level = 1; + } + break; + case CHIP_VEGA10: /* XXX Don't know how to get VRAM type yet. */ adev->mc.vram_type = AMDGPU_VRAM_TYPE_HBM; /* @@ -587,11 +572,18 @@ static int gmc_v9_0_sw_init(void *handle) */ adev->vm_manager.vm_size = 1U << 18; adev->vm_manager.block_size = 9; - DRM_INFO("vm size is %llu GB, block size is %u-bit\n", - adev->vm_manager.vm_size, - adev->vm_manager.block_size); + adev->vm_manager.num_level = 3; + amdgpu_vm_set_fragment_size(adev, 9); + break; + default: + break; } + DRM_INFO("vm size is %llu GB, block size is %u-bit,fragment size is %u-bit\n", + adev->vm_manager.vm_size, + adev->vm_manager.block_size, + adev->vm_manager.fragment_size); + /* This interrupt is VMC page fault.*/ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0, &adev->mc.vm_fault); @@ -609,6 +601,12 @@ static int gmc_v9_0_sw_init(void *handle) */ adev->mc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ + /* + * It needs to reserve 8M stolen memory for vega10 + * TODO: Figure out how to avoid that... + */ + adev->mc.stolen_size = 8 * 1024 * 1024; + /* set DMA mask + need_dma32 flags. * PCIE - can handle 44-bits. * IGP - can handle 44-bits @@ -641,15 +639,18 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; - if (!adev->vm_manager.enabled) { - r = gmc_v9_0_vm_init(adev); - if (r) { - dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); - return r; - } - adev->vm_manager.enabled = true; - } - return r; + /* + * number of VMs + * VMID 0 is reserved for System + * amdgpu graphics/compute will use VMIDs 1-7 + * amdkfd will use VMIDs 8-15 + */ + adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; + + amdgpu_vm_manager_init(adev); + + return 0; } /** @@ -669,11 +670,7 @@ static int gmc_v9_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->vm_manager.enabled) { - amdgpu_vm_manager_fini(adev); - gmc_v9_0_vm_fini(adev); - adev->vm_manager.enabled = false; - } + amdgpu_vm_manager_fini(adev); gmc_v9_0_gart_fini(adev); amdgpu_gem_force_release(adev); amdgpu_bo_fini(adev); @@ -686,6 +683,8 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: break; + case CHIP_RAVEN: + break; default: break; } @@ -715,7 +714,19 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) return r; /* After HDP is initialized, flush HDP.*/ - nbio_v6_1_hdp_flush(adev); + if (adev->flags & AMD_IS_APU) + nbio_v7_0_hdp_flush(adev); + else + nbio_v6_1_hdp_flush(adev); + + switch (adev->asic_type) { + case CHIP_RAVEN: + mmhub_v1_0_initialize_power_gating(adev); + mmhub_v1_0_update_power_gating(adev, true); + break; + default: + break; + } r = gfxhub_v1_0_gart_enable(adev); if (r) @@ -725,12 +736,12 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) if (r) return r; - tmp = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MISC_CNTL)); + tmp = RREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL); tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK; - WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MISC_CNTL), tmp); + WREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL, tmp); - tmp = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_HOST_PATH_CNTL)); - WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_HOST_PATH_CNTL), tmp); + tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); + WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) @@ -744,7 +755,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) gmc_v9_0_gart_flush_gpu_tlb(adev, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(adev->mc.gtt_size >> 20), + (unsigned)(adev->mc.gart_size >> 20), (unsigned long long)adev->gart.table_addr); adev->gart.ready = true; return 0; @@ -758,6 +769,20 @@ static int gmc_v9_0_hw_init(void *handle) /* The sequence of these two function calls matters.*/ gmc_v9_0_init_golden_registers(adev); + if (adev->mode_info.num_crtc) { + u32 tmp; + + /* Lockout access through VGA aperture*/ + tmp = RREG32_SOC15(DCE, 0, mmVGA_HDP_CONTROL); + tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); + WREG32_SOC15(DCE, 0, mmVGA_HDP_CONTROL, tmp); + + /* disable VGA render */ + tmp = RREG32_SOC15(DCE, 0, mmVGA_RENDER_CONTROL); + tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); + WREG32_SOC15(DCE, 0, mmVGA_RENDER_CONTROL, tmp); + } + r = gmc_v9_0_gart_enable(adev); return r; @@ -781,6 +806,12 @@ static int gmc_v9_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_vf(adev)) { + /* full access mode, so don't touch any GMC register */ + DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); + return 0; + } + amdgpu_irq_put(adev, &adev->mc.vm_fault, 0); gmc_v9_0_gart_disable(adev); @@ -831,7 +862,16 @@ static int gmc_v9_0_soft_reset(void *handle) static int gmc_v9_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { - return 0; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return mmhub_v1_0_set_clockgating(adev, state); +} + +static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + mmhub_v1_0_get_clockgating(adev, flags); } static int gmc_v9_0_set_powergating_state(void *handle, @@ -855,6 +895,7 @@ const struct amd_ip_funcs gmc_v9_0_ip_funcs = { .soft_reset = gmc_v9_0_soft_reset, .set_clockgating_state = gmc_v9_0_set_clockgating_state, .set_powergating_state = gmc_v9_0_set_powergating_state, + .get_clockgating_state = gmc_v9_0_get_clockgating_state, }; const struct amdgpu_ip_block_version gmc_v9_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index cb622add99a7..7a0ea27ac429 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -20,7 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_ih.h" #include "vid.h" diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index 79a52ad2c80d..3bbf2ccfca89 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -21,7 +21,7 @@ * */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_pm.h" #include "cikd.h" diff --git a/drivers/gpu/drm/amd/amdgpu/kv_smc.c b/drivers/gpu/drm/amd/amdgpu/kv_smc.c index e6b7b42acfe1..b82e33c01571 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_smc.c @@ -22,7 +22,7 @@ * Authors: Alex Deucher */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "cikd.h" #include "kv_dpm.h" diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index dbfe48d1207a..74cb647da30e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -34,9 +34,12 @@ #include "soc15_common.h" +#define mmDAGB0_CNTL_MISC2_RV 0x008f +#define mmDAGB0_CNTL_MISC2_RV_BASE_IDX 0 + u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) { - u64 base = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE)); + u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE); base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK; base <<= 24; @@ -44,184 +47,162 @@ u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) return base; } -int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) +static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev) { - u32 tmp; - u64 value; - uint64_t addr; - u32 i; + uint64_t value; - /* Program MC. */ - /* Update configuration */ - DRM_INFO("%s -- in\n", __func__); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR), - adev->mc.vram_start >> 18); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR), - adev->mc.vram_end >> 18); - value = adev->vram_scratch.gpu_addr - adev->mc.vram_start + + BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); + value = adev->gart.table_addr - adev->mc.vram_start + adev->vm_manager.vram_base_offset; - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB), - (u32)(value >> 12)); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB), - (u32)(value >> 44)); + value &= 0x0000FFFFFFFFF000ULL; + value |= 0x1; /* valid bit */ - if (amdgpu_sriov_vf(adev)) { - /* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are VF copy registers so - vbios post doesn't program them, for SRIOV driver need to program them */ - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE), - adev->mc.vram_start >> 24); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP), - adev->mc.vram_end >> 24); - } + WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + lower_32_bits(value)); + + WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + upper_32_bits(value)); +} + +static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) +{ + mmhub_v1_0_init_gart_pt_regs(adev); + + WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->mc.gart_start >> 12)); + WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->mc.gart_start >> 44)); + + WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->mc.gart_end >> 12)); + WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->mc.gart_end >> 44)); +} + +static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t value; + uint32_t tmp; /* Disable AGP. */ - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_BASE), 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_TOP), 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_BOT), 0x00FFFFFF); + WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BASE, 0); + WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_TOP, 0); + WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BOT, 0x00FFFFFF); - /* GART Enable. */ + /* Program the system aperture low logical page number. */ + WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, + adev->mc.vram_start >> 18); + WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + adev->mc.vram_end >> 18); + + /* Set default page address. */ + value = adev->vram_scratch.gpu_addr - adev->mc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page.addr >> 12)); + WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page.addr >> 44)); + + tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2, tmp); +} + +static void mmhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; /* Setup TLB control */ - tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL)); + tmp = RREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - SYSTEM_ACCESS_MODE, - 3); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - ENABLE_ADVANCED_DRIVER_MODEL, - 1); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - SYSTEM_APERTURE_UNMAPPED_ACCESS, - 0); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - ECO_BITS, - 0); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - MTYPE, - MTYPE_UC);/* XXX for emulation. */ - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - ATC_EN, - 1); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC);/* XXX for emulation. */ + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); + + WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); +} + +static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; /* Setup L2 cache */ - tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL)); + tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL, - ENABLE_L2_FRAGMENT_PROCESSING, - 0); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL, - L2_PDE0_CACHE_TAG_GENERATION_MODE, - 0);/* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, + 0); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL, - CONTEXT1_IDENTITY_ACCESS_MODE, - 1); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL, - IDENTITY_MODE_FRAGMENT_SIZE, - 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL), tmp); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp); - tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL2)); + tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL2), tmp); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp); tmp = mmVM_L2_CNTL3_DEFAULT; - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL3), tmp); - - tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL4)); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL4, - VMC_TAP_PDE_REQUEST_PHYSICAL, - 0); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL4, - VMC_TAP_PTE_REQUEST_PHYSICAL, - 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL4), tmp); - - /* setup context0 */ - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32), - (u32)(adev->mc.gtt_start >> 12)); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32), - (u32)(adev->mc.gtt_start >> 44)); - - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32), - (u32)(adev->mc.gtt_end >> 12)); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32), - (u32)(adev->mc.gtt_end >> 44)); - - BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); - value = adev->gart.table_addr - adev->mc.vram_start + - adev->vm_manager.vram_base_offset; - value &= 0x0000FFFFFFFFF000ULL; - value |= 0x1; /* valid bit */ - - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32), - (u32)value); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32), - (u32)(value >> 32)); - - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32), - (u32)(adev->dummy_page.addr >> 12)); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32), - (u32)((u64)adev->dummy_page.addr >> 44)); - - tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2)); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2, - ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, - 1); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2), tmp); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp); + + tmp = mmVM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL4, tmp); +} - addr = SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL); - tmp = RREG32(addr); +static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev) +{ + uint32_t tmp; + tmp = RREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL), tmp); - - tmp = RREG32(addr); - - /* Disable identity aperture.*/ - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32), 0XFFFFFFFF); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32), 0x0000000F); + WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL, tmp); +} - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32), 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32), 0); +static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) +{ + WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0XFFFFFFFF); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x0000000F); + + WREG32_SOC15(MMHUB, 0, + mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0); + WREG32_SOC15(MMHUB, 0, + mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0); + + WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, + 0); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, + 0); +} - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32), 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32), 0); +static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) +{ + int i; + uint32_t tmp; for (i = 0; i <= 14; i++) { - tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL) - + i); + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, @@ -243,14 +224,280 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, adev->vm_manager.block_size - 9); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL) + i, tmp); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2, + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i, tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2, + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, upper_32_bits(adev->vm_manager.max_pfn - 1)); } +} + +static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev) +{ + unsigned i; + + for (i = 0; i < 18; ++i) { + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + 2 * i, 0xffffffff); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + 2 * i, 0x1f); + } +} + +struct pctl_data { + uint32_t index; + uint32_t data; +}; + +static const struct pctl_data pctl0_data[] = { + {0x0, 0x7a640}, + {0x9, 0x2a64a}, + {0xd, 0x2a680}, + {0x11, 0x6a684}, + {0x19, 0xea68e}, + {0x29, 0xa69e}, + {0x2b, 0x34a6c0}, + {0x61, 0x83a707}, + {0xe6, 0x8a7a4}, + {0xf0, 0x1a7b8}, + {0xf3, 0xfa7cc}, + {0x104, 0x17a7dd}, + {0x11d, 0xa7dc}, + {0x11f, 0x12a7f5}, + {0x133, 0xa808}, + {0x135, 0x12a810}, + {0x149, 0x7a82c} +}; +#define PCTL0_DATA_LEN (sizeof(pctl0_data)/sizeof(pctl0_data[0])) + +#define PCTL0_RENG_EXEC_END_PTR 0x151 +#define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE 0xa640 +#define PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa833 + +static const struct pctl_data pctl1_data[] = { + {0x0, 0x39a000}, + {0x3b, 0x44a040}, + {0x81, 0x2a08d}, + {0x85, 0x6ba094}, + {0xf2, 0x18a100}, + {0x10c, 0x4a132}, + {0x112, 0xca141}, + {0x120, 0x2fa158}, + {0x151, 0x17a1d0}, + {0x16a, 0x1a1e9}, + {0x16d, 0x13a1ec}, + {0x182, 0x7a201}, + {0x18b, 0x3a20a}, + {0x190, 0x7a580}, + {0x199, 0xa590}, + {0x19b, 0x4a594}, + {0x1a1, 0x1a59c}, + {0x1a4, 0x7a82c}, + {0x1ad, 0xfa7cc}, + {0x1be, 0x17a7dd}, + {0x1d7, 0x12a810}, + {0x1eb, 0x4000a7e1}, + {0x1ec, 0x5000a7f5}, + {0x1ed, 0x4000a7e2}, + {0x1ee, 0x5000a7dc}, + {0x1ef, 0x4000a7e3}, + {0x1f0, 0x5000a7f6}, + {0x1f1, 0x5000a7e4} +}; +#define PCTL1_DATA_LEN (sizeof(pctl1_data)/sizeof(pctl1_data[0])) + +#define PCTL1_RENG_EXEC_END_PTR 0x1f1 +#define PCTL1_STCTRL_REG_SAVE_RANGE0_BASE 0xa000 +#define PCTL1_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa20d +#define PCTL1_STCTRL_REG_SAVE_RANGE1_BASE 0xa580 +#define PCTL1_STCTRL_REG_SAVE_RANGE1_LIMIT 0xa59d +#define PCTL1_STCTRL_REG_SAVE_RANGE2_BASE 0xa82c +#define PCTL1_STCTRL_REG_SAVE_RANGE2_LIMIT 0xa833 + +static void mmhub_v1_0_power_gating_write_save_ranges(struct amdgpu_device *adev) +{ + uint32_t tmp = 0; + + /* PCTL0_STCTRL_REGISTER_SAVE_RANGE0 */ + tmp = REG_SET_FIELD(tmp, PCTL0_STCTRL_REGISTER_SAVE_RANGE0, + STCTRL_REGISTER_SAVE_BASE, + PCTL0_STCTRL_REG_SAVE_RANGE0_BASE); + tmp = REG_SET_FIELD(tmp, PCTL0_STCTRL_REGISTER_SAVE_RANGE0, + STCTRL_REGISTER_SAVE_LIMIT, + PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT); + WREG32_SOC15(MMHUB, 0, mmPCTL0_STCTRL_REGISTER_SAVE_RANGE0, tmp); + + /* PCTL1_STCTRL_REGISTER_SAVE_RANGE0 */ + tmp = 0; + tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE0, + STCTRL_REGISTER_SAVE_BASE, + PCTL1_STCTRL_REG_SAVE_RANGE0_BASE); + tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE0, + STCTRL_REGISTER_SAVE_LIMIT, + PCTL1_STCTRL_REG_SAVE_RANGE0_LIMIT); + WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE0, tmp); + + /* PCTL1_STCTRL_REGISTER_SAVE_RANGE1 */ + tmp = 0; + tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE1, + STCTRL_REGISTER_SAVE_BASE, + PCTL1_STCTRL_REG_SAVE_RANGE1_BASE); + tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE1, + STCTRL_REGISTER_SAVE_LIMIT, + PCTL1_STCTRL_REG_SAVE_RANGE1_LIMIT); + WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE1, tmp); + + /* PCTL1_STCTRL_REGISTER_SAVE_RANGE2 */ + tmp = 0; + tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE2, + STCTRL_REGISTER_SAVE_BASE, + PCTL1_STCTRL_REG_SAVE_RANGE2_BASE); + tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE2, + STCTRL_REGISTER_SAVE_LIMIT, + PCTL1_STCTRL_REG_SAVE_RANGE2_LIMIT); + WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE2, tmp); +} + +void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev) +{ + uint32_t pctl0_misc = 0; + uint32_t pctl0_reng_execute = 0; + uint32_t pctl1_misc = 0; + uint32_t pctl1_reng_execute = 0; + int i = 0; + + if (amdgpu_sriov_vf(adev)) + return; + + pctl0_misc = RREG32_SOC15(MMHUB, 0, mmPCTL0_MISC); + pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE); + pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC); + pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE); + + /* Light sleep must be disabled before writing to pctl0 registers */ + pctl0_misc &= ~PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK; + WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc); + + /* Write data used to access ram of register engine */ + for (i = 0; i < PCTL0_DATA_LEN; i++) { + WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_RAM_INDEX, + pctl0_data[i].index); + WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_RAM_DATA, + pctl0_data[i].data); + } + + /* Set the reng execute end ptr for pctl0 */ + pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, + PCTL0_RENG_EXECUTE, + RENG_EXECUTE_END_PTR, + PCTL0_RENG_EXEC_END_PTR); + WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute); + + /* Light sleep must be disabled before writing to pctl1 registers */ + pctl1_misc &= ~PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK; + WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc); + + /* Write data used to access ram of register engine */ + for (i = 0; i < PCTL1_DATA_LEN; i++) { + WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_RAM_INDEX, + pctl1_data[i].index); + WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_RAM_DATA, + pctl1_data[i].data); + } + + /* Set the reng execute end ptr for pctl1 */ + pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute, + PCTL1_RENG_EXECUTE, + RENG_EXECUTE_END_PTR, + PCTL1_RENG_EXEC_END_PTR); + WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute); + + mmhub_v1_0_power_gating_write_save_ranges(adev); + + /* Re-enable light sleep */ + pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK; + WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc); + pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK; + WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc); +} + +void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t pctl0_reng_execute = 0; + uint32_t pctl1_reng_execute = 0; + + if (amdgpu_sriov_vf(adev)) + return; + + pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE); + pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE); + + if (enable && adev->pg_flags & AMD_PG_SUPPORT_MMHUB) { + pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, + PCTL0_RENG_EXECUTE, + RENG_EXECUTE_ON_PWR_UP, 1); + pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, + PCTL0_RENG_EXECUTE, + RENG_EXECUTE_ON_REG_UPDATE, 1); + WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute); + + pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute, + PCTL1_RENG_EXECUTE, + RENG_EXECUTE_ON_PWR_UP, 1); + pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute, + PCTL1_RENG_EXECUTE, + RENG_EXECUTE_ON_REG_UPDATE, 1); + WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute); + + } else { + pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, + PCTL0_RENG_EXECUTE, + RENG_EXECUTE_ON_PWR_UP, 0); + pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, + PCTL0_RENG_EXECUTE, + RENG_EXECUTE_ON_REG_UPDATE, 0); + WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute); + + pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute, + PCTL1_RENG_EXECUTE, + RENG_EXECUTE_ON_PWR_UP, 0); + pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute, + PCTL1_RENG_EXECUTE, + RENG_EXECUTE_ON_REG_UPDATE, 0); + WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute); + } +} + +int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) { + /* + * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are + * VF copy registers so vbios post doesn't program them, for + * SRIOV driver need to program them + */ + WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE, + adev->mc.vram_start >> 24); + WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP, + adev->mc.vram_end >> 24); + } + + /* GART Enable. */ + mmhub_v1_0_init_gart_aperture_regs(adev); + mmhub_v1_0_init_system_aperture_regs(adev); + mmhub_v1_0_init_tlb_regs(adev); + mmhub_v1_0_init_cache_regs(adev); + + mmhub_v1_0_enable_system_domain(adev); + mmhub_v1_0_disable_identity_aperture(adev); + mmhub_v1_0_setup_vmid_config(adev); + mmhub_v1_0_program_invalidation(adev); return 0; } @@ -262,22 +509,22 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) /* Disable all tables */ for (i = 0; i < 16; i++) - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL) + i, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL, i, 0); /* Setup TLB control */ - tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL)); + tmp = RREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_ADVANCED_DRIVER_MODEL, 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); + WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); /* Setup L2 cache */ - tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL)); + tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL), tmp); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL3), 0); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0); } /** @@ -289,7 +536,7 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { u32 tmp; - tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL)); + tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, @@ -314,22 +561,11 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp); + WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp); } -static int mmhub_v1_0_early_init(void *handle) +void mmhub_v1_0_init(struct amdgpu_device *adev) { - return 0; -} - -static int mmhub_v1_0_late_init(void *handle) -{ - return 0; -} - -static int mmhub_v1_0_sw_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB]; hub->ctx0_ptb_addr_lo32 = @@ -349,69 +585,20 @@ static int mmhub_v1_0_sw_init(void *handle) hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); - return 0; -} - -static int mmhub_v1_0_sw_fini(void *handle) -{ - return 0; -} - -static int mmhub_v1_0_hw_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - unsigned i; - - for (i = 0; i < 18; ++i) { - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32) + - 2 * i, 0xffffffff); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32) + - 2 * i, 0x1f); - } - - return 0; -} - -static int mmhub_v1_0_hw_fini(void *handle) -{ - return 0; -} - -static int mmhub_v1_0_suspend(void *handle) -{ - return 0; -} - -static int mmhub_v1_0_resume(void *handle) -{ - return 0; -} - -static bool mmhub_v1_0_is_idle(void *handle) -{ - return true; -} - -static int mmhub_v1_0_wait_for_idle(void *handle) -{ - return 0; -} - -static int mmhub_v1_0_soft_reset(void *handle) -{ - return 0; } static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable) { - uint32_t def, data, def1, data1, def2, data2; + uint32_t def, data, def1, data1, def2 = 0, data2 = 0; - def = data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG)); - def1 = data1 = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB0_CNTL_MISC2)); - def2 = data2 = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_CNTL_MISC2)); + def = data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG); + + if (adev->asic_type != CHIP_RAVEN) { + def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); + def2 = data2 = RREG32_SOC15(MMHUB, 0, mmDAGB1_CNTL_MISC2); + } else + def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_RV); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { data |= ATC_L2_MISC_CG__ENABLE_MASK; @@ -423,12 +610,13 @@ static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *ad DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); - data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | - DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK | - DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | - DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK | - DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | - DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + if (adev->asic_type != CHIP_RAVEN) + data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); } else { data &= ~ATC_L2_MISC_CG__ENABLE_MASK; @@ -439,22 +627,27 @@ static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *ad DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); - data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | - DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK | - DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | - DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK | - DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | - DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + if (adev->asic_type != CHIP_RAVEN) + data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); } if (def != data) - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG), data); + WREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG, data); - if (def1 != data1) - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB0_CNTL_MISC2), data1); + if (def1 != data1) { + if (adev->asic_type != CHIP_RAVEN) + WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1); + else + WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_RV, data1); + } - if (def2 != data2) - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_CNTL_MISC2), data2); + if (adev->asic_type != CHIP_RAVEN && def2 != data2) + WREG32_SOC15(MMHUB, 0, mmDAGB1_CNTL_MISC2, data2); } static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, @@ -462,7 +655,7 @@ static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, { uint32_t def, data; - def = data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL)); + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; @@ -470,7 +663,7 @@ static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; if (def != data) - WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL), data); + WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); } static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, @@ -478,7 +671,7 @@ static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *ade { uint32_t def, data; - def = data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG)); + def = data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) data |= ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; @@ -486,7 +679,7 @@ static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *ade data &= ~ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; if (def != data) - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG), data); + WREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG, data); } static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, @@ -494,7 +687,7 @@ static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, { uint32_t def, data; - def = data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL)); + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) @@ -503,19 +696,18 @@ static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; if(def != data) - WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL), data); + WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); } -static int mmhub_v1_0_set_clockgating_state(void *handle, - enum amd_clockgating_state state) +int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (amdgpu_sriov_vf(adev)) return 0; switch (adev->asic_type) { case CHIP_VEGA10: + case CHIP_RAVEN: mmhub_v1_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); athub_update_medium_grain_clock_gating(adev, @@ -532,54 +724,20 @@ static int mmhub_v1_0_set_clockgating_state(void *handle, return 0; } -static void mmhub_v1_0_get_clockgating_state(void *handle, u32 *flags) +void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; if (amdgpu_sriov_vf(adev)) *flags = 0; /* AMD_CG_SUPPORT_MC_MGCG */ - data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL)); + data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) *flags |= AMD_CG_SUPPORT_MC_MGCG; /* AMD_CG_SUPPORT_MC_LS */ - data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG)); + data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG); if (data & ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) *flags |= AMD_CG_SUPPORT_MC_LS; } - -static int mmhub_v1_0_set_powergating_state(void *handle, - enum amd_powergating_state state) -{ - return 0; -} - -const struct amd_ip_funcs mmhub_v1_0_ip_funcs = { - .name = "mmhub_v1_0", - .early_init = mmhub_v1_0_early_init, - .late_init = mmhub_v1_0_late_init, - .sw_init = mmhub_v1_0_sw_init, - .sw_fini = mmhub_v1_0_sw_fini, - .hw_init = mmhub_v1_0_hw_init, - .hw_fini = mmhub_v1_0_hw_fini, - .suspend = mmhub_v1_0_suspend, - .resume = mmhub_v1_0_resume, - .is_idle = mmhub_v1_0_is_idle, - .wait_for_idle = mmhub_v1_0_wait_for_idle, - .soft_reset = mmhub_v1_0_soft_reset, - .set_clockgating_state = mmhub_v1_0_set_clockgating_state, - .set_powergating_state = mmhub_v1_0_set_powergating_state, - .get_clockgating_state = mmhub_v1_0_get_clockgating_state, -}; - -const struct amdgpu_ip_block_version mmhub_v1_0_ip_block = -{ - .type = AMD_IP_BLOCK_TYPE_MMHUB, - .major = 1, - .minor = 0, - .rev = 0, - .funcs = &mmhub_v1_0_ip_funcs, -}; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h index aadedf99c028..5d38229baf69 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h @@ -28,8 +28,12 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev); void mmhub_v1_0_gart_disable(struct amdgpu_device *adev); void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value); - -extern const struct amd_ip_funcs mmhub_v1_0_ip_funcs; -extern const struct amdgpu_ip_block_version mmhub_v1_0_ip_block; +void mmhub_v1_0_init(struct amdgpu_device *adev); +int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state); +void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); +void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev); +void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, + bool enable); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 1493301b6a94..2812d88a8bdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -72,21 +72,6 @@ static void xgpu_ai_mailbox_set_valid(struct amdgpu_device *adev, bool val) reg); } -static void xgpu_ai_mailbox_trans_msg(struct amdgpu_device *adev, - enum idh_request req) -{ - u32 reg; - - reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, - mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0)); - reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0, - MSGBUF_DATA, req); - WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0), - reg); - - xgpu_ai_mailbox_set_valid(adev, true); -} - static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev, enum idh_event event) { @@ -124,8 +109,8 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev) r = -ETIME; break; } - msleep(1); - timeout -= 1; + mdelay(5); + timeout -= 5; reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL)); @@ -141,12 +126,12 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event) r = xgpu_ai_mailbox_rcv_msg(adev, event); while (r) { if (timeout <= 0) { - pr_err("Doesn't get ack from pf.\n"); + pr_err("Doesn't get msg:%d from pf.\n", event); r = -ETIME; break; } - msleep(1); - timeout -= 1; + mdelay(5); + timeout -= 5; r = xgpu_ai_mailbox_rcv_msg(adev, event); } @@ -154,28 +139,50 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event) return r; } - -static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, - enum idh_request req) -{ +static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev, + enum idh_request req, u32 data1, u32 data2, u32 data3) { + u32 reg; int r; - xgpu_ai_mailbox_trans_msg(adev, req); + reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0)); + reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0, + MSGBUF_DATA, req); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0), + reg); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW1), + data1); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW2), + data2); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW3), + data3); + + xgpu_ai_mailbox_set_valid(adev, true); /* start to poll ack */ r = xgpu_ai_poll_ack(adev); if (r) - return r; + pr_err("Doesn't get ack from pf, continue\n"); xgpu_ai_mailbox_set_valid(adev, false); +} + +static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, + enum idh_request req) +{ + int r; + + xgpu_ai_mailbox_trans_msg(adev, req, 0, 0, 0); /* start to check msg if request is idh_req_gpu_init_access */ if (req == IDH_REQ_GPU_INIT_ACCESS || req == IDH_REQ_GPU_FINI_ACCESS || req == IDH_REQ_GPU_RESET_ACCESS) { r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); - if (r) + if (r) { + pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n"); return r; + } } return 0; @@ -241,7 +248,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) } /* Trigger recovery due to world switch failure */ - amdgpu_sriov_gpu_reset(adev, false); + amdgpu_sriov_gpu_reset(adev, NULL); } static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, @@ -264,12 +271,15 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev, { int r; - /* see what event we get */ - r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); + /* trigger gpu-reset by hypervisor only if TDR disbaled */ + if (amdgpu_lockup_timeout == 0) { + /* see what event we get */ + r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); - /* only handle FLR_NOTIFY now */ - if (!r) - schedule_work(&adev->virt.flr_work); + /* only handle FLR_NOTIFY now */ + if (!r) + schedule_work(&adev->virt.flr_work); + } return 0; } @@ -296,11 +306,11 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev) { int r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 135, &adev->virt.rcv_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq); if (r) return r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 138, &adev->virt.ack_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq); if (r) { amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); return r; @@ -337,4 +347,5 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = { .req_full_gpu = xgpu_ai_request_full_gpu_access, .rel_full_gpu = xgpu_ai_release_full_gpu_access, .reset_gpu = xgpu_ai_request_reset, + .trans_msg = xgpu_ai_mailbox_trans_msg, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index 9aefc44d2c34..1e91b9a1c591 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -31,7 +31,9 @@ enum idh_request { IDH_REL_GPU_INIT_ACCESS, IDH_REQ_GPU_FINI_ACCESS, IDH_REL_GPU_FINI_ACCESS, - IDH_REQ_GPU_RESET_ACCESS + IDH_REQ_GPU_RESET_ACCESS, + + IDH_LOG_VF_ERROR = 200, }; enum idh_event { diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 7bdc51b02326..c25a831f94ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -398,8 +398,8 @@ static int xgpu_vi_poll_ack(struct amdgpu_device *adev) r = -ETIME; break; } - msleep(1); - timeout -= 1; + mdelay(5); + timeout -= 5; reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); } @@ -418,8 +418,8 @@ static int xgpu_vi_poll_msg(struct amdgpu_device *adev, enum idh_event event) r = -ETIME; break; } - msleep(1); - timeout -= 1; + mdelay(5); + timeout -= 5; r = xgpu_vi_mailbox_rcv_msg(adev, event); } @@ -447,7 +447,7 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev, request == IDH_REQ_GPU_RESET_ACCESS) { r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); if (r) - return r; + pr_err("Doesn't get ack from pf, continue\n"); } return 0; @@ -514,7 +514,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) } /* Trigger recovery due to world switch failure */ - amdgpu_sriov_gpu_reset(adev, false); + amdgpu_sriov_gpu_reset(adev, NULL); } static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, @@ -537,12 +537,15 @@ static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device *adev, { int r; - /* see what event we get */ - r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); + /* trigger gpu-reset by hypervisor only if TDR disbaled */ + if (amdgpu_lockup_timeout == 0) { + /* see what event we get */ + r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); - /* only handle FLR_NOTIFY now */ - if (!r) - schedule_work(&adev->virt.flr_work); + /* only handle FLR_NOTIFY now */ + if (!r) + schedule_work(&adev->virt.flr_work); + } return 0; } @@ -610,4 +613,5 @@ const struct amdgpu_virt_ops xgpu_vi_virt_ops = { .req_full_gpu = xgpu_vi_request_full_gpu_access, .rel_full_gpu = xgpu_vi_release_full_gpu_access, .reset_gpu = xgpu_vi_request_reset, + .trans_msg = NULL, /* Does not need to trans VF errors to host. */ }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h index 2db741131bc6..c791d73d2d54 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h @@ -32,7 +32,9 @@ enum idh_request { IDH_REL_GPU_INIT_ACCESS, IDH_REQ_GPU_FINI_ACCESS, IDH_REL_GPU_FINI_ACCESS, - IDH_REQ_GPU_RESET_ACCESS + IDH_REQ_GPU_RESET_ACCESS, + + IDH_LOG_VF_ERROR = 200, }; /* VI mailbox messages data */ diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 97057f4a10de..045988b18bc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -32,10 +32,11 @@ #define smnCPM_CONTROL 0x11180460 #define smnPCIE_CNTL2 0x11180070 +#define smnPCIE_CONFIG_CNTL 0x11180044 u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) { - u32 tmp = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0)); + u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; @@ -46,32 +47,33 @@ u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) u32 nbio_v6_1_get_atombios_scratch_regs(struct amdgpu_device *adev, uint32_t idx) { - return RREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0) + idx); + return RREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx); } void nbio_v6_1_set_atombios_scratch_regs(struct amdgpu_device *adev, uint32_t idx, uint32_t val) { - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0) + idx, val); + WREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx, val); } void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable) { if (enable) - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_FB_EN), - BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); + WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, + BIF_FB_EN__FB_READ_EN_MASK | + BIF_FB_EN__FB_WRITE_EN_MASK); else - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_FB_EN), 0); + WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); } void nbio_v6_1_hdp_flush(struct amdgpu_device *adev) { - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL), 0); + WREG32_SOC15_NO_KIQ(NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL, 0); } u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev) { - return RREG32(SOC15_REG_OFFSET(NBIO, 0, mmRCC_PF_0_0_RCC_CONFIG_MEMSIZE)); + return RREG32_SOC15(NBIO, 0, mmRCC_PF_0_0_RCC_CONFIG_MEMSIZE); } static const u32 nbio_sdma_doorbell_range_reg[] = @@ -97,15 +99,7 @@ void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance, void nbio_v6_1_enable_doorbell_aperture(struct amdgpu_device *adev, bool enable) { - u32 tmp; - - tmp = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmRCC_PF_0_0_RCC_DOORBELL_APER_EN)); - if (enable) - tmp = REG_SET_FIELD(tmp, RCC_PF_0_0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, 1); - else - tmp = REG_SET_FIELD(tmp, RCC_PF_0_0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, 0); - - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmRCC_PF_0_0_RCC_DOORBELL_APER_EN), tmp); + WREG32_FIELD15(NBIO, 0, RCC_PF_0_0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, enable ? 1 : 0); } void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, @@ -115,23 +109,23 @@ void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, if (enable) { tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_EN, 1) | - REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_MODE, 1) | - REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_SIZE, 0); + REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_MODE, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_SIZE, 0); - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW), - lower_32_bits(adev->doorbell.base)); - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH), - upper_32_bits(adev->doorbell.base)); + WREG32_SOC15(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW, + lower_32_bits(adev->doorbell.base)); + WREG32_SOC15(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH, + upper_32_bits(adev->doorbell.base)); } - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL), tmp); + WREG32_SOC15(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, tmp); } void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, int doorbell_index) { - u32 ih_doorbell_range = RREG32(SOC15_REG_OFFSET(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE)); + u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE); if (use_doorbell) { ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index); @@ -139,7 +133,7 @@ void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, } else ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0); - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_IH_DOORBELL_RANGE), ih_doorbell_range); + WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range); } void nbio_v6_1_ih_control(struct amdgpu_device *adev) @@ -147,15 +141,15 @@ void nbio_v6_1_ih_control(struct amdgpu_device *adev) u32 interrupt_cntl; /* setup interrupt control */ - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmINTERRUPT_CNTL2), adev->dummy_page.addr >> 8); - interrupt_cntl = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmINTERRUPT_CNTL)); + WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8); + interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL); /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN */ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0); /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0); - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmINTERRUPT_CNTL), interrupt_cntl); + WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl); } void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, @@ -251,8 +245,7 @@ void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev) { uint32_t reg; - reg = RREG32(SOC15_REG_OFFSET(NBIO, 0, - mmRCC_PF_0_0_RCC_IOV_FUNC_IDENTIFIER)); + reg = RREG32_SOC15(NBIO, 0, mmRCC_PF_0_0_RCC_IOV_FUNC_IDENTIFIER); if (reg & 1) adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF; @@ -264,3 +257,15 @@ void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev) adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; } } + +void nbio_v6_1_init_registers(struct amdgpu_device *adev) +{ + uint32_t def, data; + + def = data = RREG32_PCIE(smnPCIE_CONFIG_CNTL); + data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1); + data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1); + + if (def != data) + WREG32_PCIE(smnPCIE_CONFIG_CNTL, data); +} diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h index f6f8bc045518..686e4b4d296a 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h @@ -50,5 +50,6 @@ void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, bool void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, bool enable); void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, u32 *flags); void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev); +void nbio_v6_1_init_registers(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c new file mode 100644 index 000000000000..11b70d601922 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -0,0 +1,212 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "nbio_v7_0.h" + +#include "vega10/soc15ip.h" +#include "raven1/NBIO/nbio_7_0_default.h" +#include "raven1/NBIO/nbio_7_0_offset.h" +#include "raven1/NBIO/nbio_7_0_sh_mask.h" +#include "vega10/vega10_enum.h" + +#define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c + +u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) +{ + u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); + + tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; + tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; + + return tmp; +} + +u32 nbio_v7_0_get_atombios_scratch_regs(struct amdgpu_device *adev, + uint32_t idx) +{ + return RREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx); +} + +void nbio_v7_0_set_atombios_scratch_regs(struct amdgpu_device *adev, + uint32_t idx, uint32_t val) +{ + WREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx, val); +} + +void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable) +{ + if (enable) + WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, + BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); + else + WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); +} + +void nbio_v7_0_hdp_flush(struct amdgpu_device *adev) +{ + WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); +} + +u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) +{ + return RREG32_SOC15(NBIO, 0, mmRCC_CONFIG_MEMSIZE); +} + +static const u32 nbio_sdma_doorbell_range_reg[] = +{ + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE), + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE) +}; + +void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index) +{ + u32 doorbell_range = RREG32(nbio_sdma_doorbell_range_reg[instance]); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); + + WREG32(nbio_sdma_doorbell_range_reg[instance], doorbell_range); +} + +void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) +{ + WREG32_FIELD15(NBIO, 0, RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, enable ? 1 : 0); +} + +void nbio_v7_0_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) +{ + u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE); + + if (use_doorbell) { + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 2); + } else + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0); + + WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range); +} + +static uint32_t nbio_7_0_read_syshub_ind_mmr(struct amdgpu_device *adev, uint32_t offset) +{ + uint32_t data; + + WREG32_SOC15(NBIO, 0, mmSYSHUB_INDEX, offset); + data = RREG32_SOC15(NBIO, 0, mmSYSHUB_DATA); + + return data; +} + +static void nbio_7_0_write_syshub_ind_mmr(struct amdgpu_device *adev, uint32_t offset, + uint32_t data) +{ + WREG32_SOC15(NBIO, 0, mmSYSHUB_INDEX, offset); + WREG32_SOC15(NBIO, 0, mmSYSHUB_DATA, data); +} + +void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + /* NBIF_MGCG_CTRL_LCLK */ + def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + data |= NBIF_MGCG_CTRL_LCLK__NBIF_MGCG_EN_LCLK_MASK; + else + data &= ~NBIF_MGCG_CTRL_LCLK__NBIF_MGCG_EN_LCLK_MASK; + + if (def != data) + WREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK, data); + + /* SYSHUB_MGCG_CTRL_SOCCLK */ + def = data = nbio_7_0_read_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SOCCLK); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + data |= SYSHUB_MMREG_DIRECT_SYSHUB_MGCG_CTRL_SOCCLK__SYSHUB_MGCG_EN_SOCCLK_MASK; + else + data &= ~SYSHUB_MMREG_DIRECT_SYSHUB_MGCG_CTRL_SOCCLK__SYSHUB_MGCG_EN_SOCCLK_MASK; + + if (def != data) + nbio_7_0_write_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SOCCLK, data); + + /* SYSHUB_MGCG_CTRL_SHUBCLK */ + def = data = nbio_7_0_read_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SHUBCLK); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + data |= SYSHUB_MMREG_DIRECT_SYSHUB_MGCG_CTRL_SHUBCLK__SYSHUB_MGCG_EN_SHUBCLK_MASK; + else + data &= ~SYSHUB_MMREG_DIRECT_SYSHUB_MGCG_CTRL_SHUBCLK__SYSHUB_MGCG_EN_SHUBCLK_MASK; + + if (def != data) + nbio_7_0_write_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SHUBCLK, data); +} + +void nbio_v7_0_ih_control(struct amdgpu_device *adev) +{ + u32 interrupt_cntl; + + /* setup interrupt control */ + WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8); + interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL); + /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi + * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN + */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0); + /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0); + WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl); +} + +struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg; +struct nbio_pcie_index_data nbio_v7_0_pcie_index_data; + +int nbio_v7_0_init(struct amdgpu_device *adev) +{ + nbio_v7_0_hdp_flush_reg.hdp_flush_req_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_REQ); + nbio_v7_0_hdp_flush_reg.hdp_flush_done_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_DONE); + nbio_v7_0_hdp_flush_reg.ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK; + + nbio_v7_0_pcie_index_data.index_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2); + nbio_v7_0_pcie_index_data.data_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h new file mode 100644 index 000000000000..054ff49427e6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h @@ -0,0 +1,49 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NBIO_V7_0_H__ +#define __NBIO_V7_0_H__ + +#include "soc15_common.h" + +extern struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg; +extern struct nbio_pcie_index_data nbio_v7_0_pcie_index_data; +int nbio_v7_0_init(struct amdgpu_device *adev); +u32 nbio_v7_0_get_atombios_scratch_regs(struct amdgpu_device *adev, + uint32_t idx); +void nbio_v7_0_set_atombios_scratch_regs(struct amdgpu_device *adev, + uint32_t idx, uint32_t val); +void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable); +void nbio_v7_0_hdp_flush(struct amdgpu_device *adev); +u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev); +void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index); +void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable); +void nbio_v7_0_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index); +void nbio_v7_0_ih_control(struct amdgpu_device *adev); +u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev); +void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c new file mode 100644 index 000000000000..f7cf994b1da2 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -0,0 +1,392 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ + +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" +#include "psp_v10_0.h" + +#include "vega10/soc15ip.h" +#include "raven1/MP/mp_10_0_offset.h" +#include "raven1/GC/gc_9_1_offset.h" +#include "raven1/SDMA0/sdma0_4_1_offset.h" + +static int +psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type) +{ + switch(ucode->ucode_id) { + case AMDGPU_UCODE_ID_SDMA0: + *type = GFX_FW_TYPE_SDMA0; + break; + case AMDGPU_UCODE_ID_SDMA1: + *type = GFX_FW_TYPE_SDMA1; + break; + case AMDGPU_UCODE_ID_CP_CE: + *type = GFX_FW_TYPE_CP_CE; + break; + case AMDGPU_UCODE_ID_CP_PFP: + *type = GFX_FW_TYPE_CP_PFP; + break; + case AMDGPU_UCODE_ID_CP_ME: + *type = GFX_FW_TYPE_CP_ME; + break; + case AMDGPU_UCODE_ID_CP_MEC1: + *type = GFX_FW_TYPE_CP_MEC; + break; + case AMDGPU_UCODE_ID_CP_MEC1_JT: + *type = GFX_FW_TYPE_CP_MEC_ME1; + break; + case AMDGPU_UCODE_ID_CP_MEC2: + *type = GFX_FW_TYPE_CP_MEC; + break; + case AMDGPU_UCODE_ID_CP_MEC2_JT: + *type = GFX_FW_TYPE_CP_MEC_ME2; + break; + case AMDGPU_UCODE_ID_RLC_G: + *type = GFX_FW_TYPE_RLC_G; + break; + case AMDGPU_UCODE_ID_SMC: + *type = GFX_FW_TYPE_SMU; + break; + case AMDGPU_UCODE_ID_UVD: + *type = GFX_FW_TYPE_UVD; + break; + case AMDGPU_UCODE_ID_VCE: + *type = GFX_FW_TYPE_VCE; + break; + case AMDGPU_UCODE_ID_MAXIMUM: + default: + return -EINVAL; + } + + return 0; +} + +int psp_v10_0_init_microcode(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + const char *chip_name; + char fw_name[30]; + int err = 0; + const struct psp_firmware_header_v1_0 *hdr; + + DRM_DEBUG("\n"); + + switch (adev->asic_type) { + case CHIP_RAVEN: + chip_name = "raven"; + break; + default: BUG(); + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name); + err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev); + if (err) + goto out; + + err = amdgpu_ucode_validate(adev->psp.asd_fw); + if (err) + goto out; + + hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data; + adev->psp.asd_fw_version = le32_to_cpu(hdr->header.ucode_version); + adev->psp.asd_feature_version = le32_to_cpu(hdr->ucode_feature_version); + adev->psp.asd_ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes); + adev->psp.asd_start_addr = (uint8_t *)hdr + + le32_to_cpu(hdr->header.ucode_array_offset_bytes); + + return 0; +out: + if (err) { + dev_err(adev->dev, + "psp v10.0: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(adev->psp.asd_fw); + adev->psp.asd_fw = NULL; + } + + return err; +} + +int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd) +{ + int ret; + uint64_t fw_mem_mc_addr = ucode->mc_addr; + struct common_firmware_header *header; + + memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); + header = (struct common_firmware_header *)ucode->fw; + + cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; + cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr); + cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr); + cmd->cmd.cmd_load_ip_fw.fw_size = le32_to_cpu(header->ucode_size_bytes); + + ret = psp_v10_0_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); + if (ret) + DRM_ERROR("Unknown firmware type\n"); + + return ret; +} + +int psp_v10_0_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring; + struct amdgpu_device *adev = psp->adev; + + ring = &psp->km_ring; + + ring->ring_type = ring_type; + + /* allocate 4k Page of Local Frame Buffer memory for ring */ + ring->ring_size = 0x1000; + ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + if (ret) { + ring->ring_size = 0; + return ret; + } + + return 0; +} + +int psp_v10_0_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) +{ + int ret = 0; + unsigned int psp_ring_reg = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* There might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + + return ret; +} + +int psp_v10_0_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring; + unsigned int psp_ring_reg = 0; + struct amdgpu_device *adev = psp->adev; + + ring = &psp->km_ring; + + /* Write the ring destroy command to C2PMSG_64 */ + psp_ring_reg = 3 << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* There might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + + amdgpu_bo_free_kernel(&adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + + return ret; +} + +int psp_v10_0_cmd_submit(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, + int index) +{ + unsigned int psp_write_ptr_reg = 0; + struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + /* KM (GPCOM) prepare write pointer */ + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + + /* Update KM RB frame pointer to new frame */ + if ((psp_write_ptr_reg % ring->ring_size) == 0) + write_frame = ring->ring_mem; + else + write_frame = ring->ring_mem + (psp_write_ptr_reg / (sizeof(struct psp_gfx_rb_frame) / 4)); + + /* Update KM RB frame */ + write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); + write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr); + write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); + write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); + write_frame->fence_value = index; + + /* Update the write Pointer in DWORDs */ + psp_write_ptr_reg += sizeof(struct psp_gfx_rb_frame) / 4; + psp_write_ptr_reg = (psp_write_ptr_reg >= ring->ring_size) ? 0 : psp_write_ptr_reg; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); + + return 0; +} + +static int +psp_v10_0_sram_map(unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, + unsigned int *sram_data_reg_offset, + enum AMDGPU_UCODE_ID ucode_id) +{ + int ret = 0; + + switch(ucode_id) { +/* TODO: needs to confirm */ +#if 0 + case AMDGPU_UCODE_ID_SMC: + *sram_offset = 0; + *sram_addr_reg_offset = 0; + *sram_data_reg_offset = 0; + break; +#endif + + case AMDGPU_UCODE_ID_CP_CE: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_PFP: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_ME: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_MEC1: + *sram_offset = 0x10000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_MEC2: + *sram_offset = 0x10000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_RLC_G: + *sram_offset = 0x2000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_SDMA0: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA); + break; + +/* TODO: needs to confirm */ +#if 0 + case AMDGPU_UCODE_ID_SDMA1: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; + + case AMDGPU_UCODE_ID_UVD: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; + + case AMDGPU_UCODE_ID_VCE: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; +#endif + + case AMDGPU_UCODE_ID_MAXIMUM: + default: + ret = -EINVAL; + break; + } + + return ret; +} + +bool psp_v10_0_compare_sram_data(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + enum AMDGPU_UCODE_ID ucode_type) +{ + int err = 0; + unsigned int fw_sram_reg_val = 0; + unsigned int fw_sram_addr_reg_offset = 0; + unsigned int fw_sram_data_reg_offset = 0; + unsigned int ucode_size; + uint32_t *ucode_mem = NULL; + struct amdgpu_device *adev = psp->adev; + + err = psp_v10_0_sram_map(&fw_sram_reg_val, &fw_sram_addr_reg_offset, + &fw_sram_data_reg_offset, ucode_type); + if (err) + return false; + + WREG32(fw_sram_addr_reg_offset, fw_sram_reg_val); + + ucode_size = ucode->ucode_size; + ucode_mem = (uint32_t *)ucode->kaddr; + while (!ucode_size) { + fw_sram_reg_val = RREG32(fw_sram_data_reg_offset); + + if (*ucode_mem != fw_sram_reg_val) + return false; + + ucode_mem++; + /* 4 bytes */ + ucode_size -= 4; + } + + return true; +} diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h new file mode 100644 index 000000000000..e76cde2f01f9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h @@ -0,0 +1,46 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ +#ifndef __PSP_V10_0_H__ +#define __PSP_V10_0_H__ + +#include "amdgpu_psp.h" + +extern int psp_v10_0_init_microcode(struct psp_context *psp); +extern int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, + struct psp_gfx_cmd_resp *cmd); +extern int psp_v10_0_ring_init(struct psp_context *psp, + enum psp_ring_type ring_type); +extern int psp_v10_0_ring_create(struct psp_context *psp, + enum psp_ring_type ring_type); +extern int psp_v10_0_ring_destroy(struct psp_context *psp, + enum psp_ring_type ring_type); +extern int psp_v10_0_cmd_submit(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, + int index); +extern bool psp_v10_0_compare_sram_data(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + enum AMDGPU_UCODE_ID ucode_type); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 60a6407ba267..2a535a4b8d5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -24,7 +24,7 @@ */ #include <linux/firmware.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_psp.h" #include "amdgpu_ucode.h" @@ -172,7 +172,7 @@ int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) /* Check sOS sign of life register to confirm sys driver and sOS * are already been loaded. */ - sol_reg = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81)); + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); if (sol_reg) return 0; @@ -188,10 +188,10 @@ int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); /* Provide the sys driver to bootrom */ - WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_36), + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); psp_gfxdrv_command_reg = 1 << 16; - WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); /* there might be handshake issue with hardware which needs delay */ @@ -213,7 +213,7 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp) /* Check sOS sign of life register to confirm sys driver and sOS * are already been loaded. */ - sol_reg = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81)); + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); if (sol_reg) return 0; @@ -229,19 +229,17 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp) memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); /* Provide the PSP secure OS to bootrom */ - WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_36), + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); psp_gfxdrv_command_reg = 2 << 16; - WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); /* there might be handshake issue with hardware which needs delay */ mdelay(20); -#if 0 ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), - RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81)), + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0, true); -#endif return ret; } @@ -254,8 +252,8 @@ int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; - cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = (uint32_t)fw_mem_mc_addr; - cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = (uint32_t)((uint64_t)fw_mem_mc_addr >> 32); + cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr); + cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr); cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size; ret = psp_v3_1_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); @@ -299,17 +297,17 @@ int psp_v3_1_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) /* Write low address of the ring to C2PMSG_69 */ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); - WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_69), psp_ring_reg); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); /* Write high address of the ring to C2PMSG_70 */ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); - WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_70), psp_ring_reg); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); /* Write size of ring to C2PMSG_71 */ psp_ring_reg = ring->ring_size; - WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_71), psp_ring_reg); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); /* Write the ring initialization command to C2PMSG_64 */ psp_ring_reg = ring_type; psp_ring_reg = psp_ring_reg << 16; - WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), psp_ring_reg); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); /* there might be handshake issue with hardware which needs delay */ mdelay(20); @@ -332,7 +330,7 @@ int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) /* Write the ring destroy command to C2PMSG_64 */ psp_ring_reg = 3 << 16; - WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), psp_ring_reg); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); /* there might be handshake issue with hardware which needs delay */ mdelay(20); @@ -341,10 +339,10 @@ int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), 0x80000000, 0x80000000, false); - if (ring->ring_mem) - amdgpu_bo_free_kernel(&adev->firmware.rbuf, - &ring->ring_mem_mc_addr, - (void **)&ring->ring_mem); + amdgpu_bo_free_kernel(&adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + return ret; } @@ -361,7 +359,7 @@ int psp_v3_1_cmd_submit(struct psp_context *psp, uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; /* KM (GPCOM) prepare write pointer */ - psp_write_ptr_reg = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_67)); + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); /* Update KM RB frame pointer to new frame */ /* write_frame ptr increments by size of rb_frame in bytes */ @@ -375,15 +373,15 @@ int psp_v3_1_cmd_submit(struct psp_context *psp, memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); /* Update KM RB frame */ - write_frame->cmd_buf_addr_hi = (unsigned int)(cmd_buf_mc_addr >> 32); - write_frame->cmd_buf_addr_lo = (unsigned int)(cmd_buf_mc_addr); - write_frame->fence_addr_hi = (unsigned int)(fence_mc_addr >> 32); - write_frame->fence_addr_lo = (unsigned int)(fence_mc_addr); + write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); + write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr); + write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); + write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); write_frame->fence_value = index; /* Update the write Pointer in DWORDs */ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; - WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_67), psp_write_ptr_reg); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); return 0; } @@ -515,7 +513,7 @@ bool psp_v3_1_smu_reload_quirk(struct psp_context *psp) uint32_t reg; reg = smnMP1_FIRMWARE_FLAGS | 0x03b00000; - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2), reg); - reg = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2)); + WREG32_SOC15(NBIO, 0, mmPCIE_INDEX2, reg); + reg = RREG32_SOC15(NBIO, 0, mmPCIE_DATA2); return (reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) ? true : false; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index a69e5d4e1d2a..b1de44f22824 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -551,17 +551,53 @@ static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev) */ static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) { - u32 f32_cntl; + u32 f32_cntl, phase_quantum = 0; int i; + if (amdgpu_sdma_phase_quantum) { + unsigned value = amdgpu_sdma_phase_quantum; + unsigned unit = 0; + + while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) { + value = (value + 1) >> 1; + unit++; + } + if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) { + value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT); + unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT); + WARN_ONCE(1, + "clamping sdma_phase_quantum to %uK clock cycles\n", + value << unit); + } + phase_quantum = + value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT | + unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT; + } + for (i = 0; i < adev->sdma.num_instances; i++) { f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]); - if (enable) + if (enable) { f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, AUTO_CTXSW_ENABLE, 1); - else + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + ATC_L1_ENABLE, 1); + if (amdgpu_sdma_phase_quantum) { + WREG32(mmSDMA0_PHASE0_QUANTUM + sdma_offsets[i], + phase_quantum); + WREG32(mmSDMA0_PHASE1_QUANTUM + sdma_offsets[i], + phase_quantum); + } + } else { f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, AUTO_CTXSW_ENABLE, 0); + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + ATC_L1_ENABLE, 1); + } + WREG32(mmSDMA0_CNTL + sdma_offsets[i], f32_cntl); } } @@ -643,8 +679,9 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); /* Initialize the ring buffer's read and write pointers */ + ring->wptr = 0; WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0); - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); + sdma_v3_0_ring_set_wptr(ring); WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0); WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0); @@ -659,9 +696,6 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); - ring->wptr = 0; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); - doorbell = RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i]); if (ring->use_doorbell) { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index ecc70a730a54..fd7c72aaafa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -35,6 +35,7 @@ #include "vega10/MMHUB/mmhub_1_0_offset.h" #include "vega10/MMHUB/mmhub_1_0_sh_mask.h" #include "vega10/HDP/hdp_4_0_offset.h" +#include "raven1/SDMA0/sdma0_4_1_default.h" #include "soc15_common.h" #include "soc15.h" @@ -42,6 +43,10 @@ MODULE_FIRMWARE("amdgpu/vega10_sdma.bin"); MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/raven_sdma.bin"); + +#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L +#define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); @@ -82,6 +87,26 @@ static const u32 golden_settings_sdma_vg10[] = { SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002 }; +static const u32 golden_settings_sdma_4_1[] = +{ + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831f07, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xffffffff, 0x3f000100, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0111, 0x00000100, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), 0xfc3fffff, 0x40000051, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL), 0x800f0111, 0x00000100, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL), 0x800f0111, 0x00000100, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UTCL1_PAGE), 0x000003ff, 0x000003c0 +}; + +static const u32 golden_settings_sdma_rv1[] = +{ + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG), 0x0018773f, 0x00000002, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00000002 +}; + static u32 sdma_v4_0_get_reg_offset(u32 instance, u32 internal_offset) { u32 base = 0; @@ -112,25 +137,19 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_sdma_vg10, (const u32)ARRAY_SIZE(golden_settings_sdma_vg10)); break; + case CHIP_RAVEN: + amdgpu_program_register_sequence(adev, + golden_settings_sdma_4_1, + (const u32)ARRAY_SIZE(golden_settings_sdma_4_1)); + amdgpu_program_register_sequence(adev, + golden_settings_sdma_rv1, + (const u32)ARRAY_SIZE(golden_settings_sdma_rv1)); + break; default: break; } } -static void sdma_v4_0_print_ucode_regs(void *handle) -{ - int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "VEGA10 SDMA ucode registers\n"); - for (i = 0; i < adev->sdma.num_instances; i++) { - dev_info(adev->dev, " SDMA%d_UCODE_ADDR=0x%08X\n", - i, RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR))); - dev_info(adev->dev, " SDMA%d_UCODE_CHECKSUM=0x%08X\n", - i, RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_CHECKSUM))); - } -} - /** * sdma_v4_0_init_microcode - load ucode images from disk * @@ -158,6 +177,9 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) case CHIP_VEGA10: chip_name = "vega10"; break; + case CHIP_RAVEN: + chip_name = "raven"; + break; default: BUG(); } @@ -269,6 +291,8 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) DRM_DEBUG("Setting write pointer\n"); if (ring->use_doorbell) { + u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs]; + DRM_DEBUG("Using doorbell -- " "wptr_offs == 0x%08x " "lower_32_bits(ring->wptr) << 2 == 0x%08x " @@ -277,8 +301,7 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) lower_32_bits(ring->wptr << 2), upper_32_bits(ring->wptr << 2)); /* XXX check if swapping is necessary on BE */ - adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr << 2); - adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2); + WRITE_ONCE(*wb, (ring->wptr << 2)); DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", ring->doorbell_index, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); @@ -350,7 +373,9 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) u32 ref_and_mask = 0; struct nbio_hdp_flush_reg *nbio_hf_reg; - if (ring->adev->asic_type == CHIP_VEGA10) + if (ring->adev->flags & AMD_IS_APU) + nbio_hf_reg = &nbio_v7_0_hdp_flush_reg; + else nbio_hf_reg = &nbio_v6_1_hdp_flush_reg; if (ring == &ring->adev->sdma.instance[0].ring) @@ -469,13 +494,45 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev) */ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) { - u32 f32_cntl; + u32 f32_cntl, phase_quantum = 0; int i; + if (amdgpu_sdma_phase_quantum) { + unsigned value = amdgpu_sdma_phase_quantum; + unsigned unit = 0; + + while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) { + value = (value + 1) >> 1; + unit++; + } + if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) { + value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT); + unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT); + WARN_ONCE(1, + "clamping sdma_phase_quantum to %uK clock cycles\n", + value << unit); + } + phase_quantum = + value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT | + unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT; + } + for (i = 0; i < adev->sdma.num_instances; i++) { f32_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL)); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, AUTO_CTXSW_ENABLE, enable ? 1 : 0); + if (enable && amdgpu_sdma_phase_quantum) { + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE0_QUANTUM), + phase_quantum); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE1_QUANTUM), + phase_quantum); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE2_QUANTUM), + phase_quantum); + } WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL), f32_cntl); } @@ -517,12 +574,13 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - u32 rb_cntl, ib_cntl; + u32 rb_cntl, ib_cntl, wptr_poll_cntl; u32 rb_bufsz; u32 wb_offset; u32 doorbell; u32 doorbell_offset; u32 temp; + u64 wptr_gpu_addr; int i, r; for (i = 0; i < adev->sdma.num_instances; i++) { @@ -581,7 +639,10 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) } WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL), doorbell); WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); - nbio_v6_1_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index); + if (adev->flags & AMD_IS_APU) + nbio_v7_0_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index); + else + nbio_v6_1_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index); if (amdgpu_sriov_vf(adev)) sdma_v4_0_ring_set_wptr(ring); @@ -601,6 +662,19 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL), temp); } + /* setup the wptr shadow polling */ + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), + lower_32_bits(wptr_gpu_addr)); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), + upper_32_bits(wptr_gpu_addr)); + wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); + if (amdgpu_sriov_vf(adev)) + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); + else + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl); + /* enable DMA RB */ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl); @@ -628,11 +702,75 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) if (adev->mman.buffer_funcs_ring == ring) amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); + } return 0; } +static void +sdma_v4_1_update_power_gating(struct amdgpu_device *adev, bool enable) +{ + uint32_t def, data; + + if (enable && (adev->pg_flags & AMD_PG_SUPPORT_SDMA)) { + /* disable idle interrupt */ + def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL)); + data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK; + + if (data != def) + WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data); + } else { + /* disable idle interrupt */ + def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL)); + data &= ~SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK; + if (data != def) + WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data); + } +} + +static void sdma_v4_1_init_power_gating(struct amdgpu_device *adev) +{ + uint32_t def, data; + + /* Enable HW based PG. */ + def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); + data |= SDMA0_POWER_CNTL__PG_CNTL_ENABLE_MASK; + if (data != def) + WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); + + /* enable interrupt */ + def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL)); + data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK; + if (data != def) + WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data); + + /* Configure hold time to filter in-valid power on/off request. Use default right now */ + def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); + data &= ~SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK; + data |= (mmSDMA0_POWER_CNTL_DEFAULT & SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK); + /* Configure switch time for hysteresis purpose. Use default right now */ + data &= ~SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK; + data |= (mmSDMA0_POWER_CNTL_DEFAULT & SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK); + if(data != def) + WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); +} + +static void sdma_v4_0_init_pg(struct amdgpu_device *adev) +{ + if (!(adev->pg_flags & AMD_PG_SUPPORT_SDMA)) + return; + + switch (adev->asic_type) { + case CHIP_RAVEN: + sdma_v4_1_init_power_gating(adev); + sdma_v4_1_update_power_gating(adev, true); + break; + default: + break; + } +} + /** * sdma_v4_0_rlc_resume - setup and start the async dma engines * @@ -643,7 +781,8 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) */ static int sdma_v4_0_rlc_resume(struct amdgpu_device *adev) { - /* XXX todo */ + sdma_v4_0_init_pg(adev); + return 0; } @@ -660,15 +799,12 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) const struct sdma_firmware_header_v1_0 *hdr; const __le32 *fw_data; u32 fw_size; - u32 digest_size = 0; int i, j; /* halt the MEs */ sdma_v4_0_enable(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { - uint16_t version_major; - uint16_t version_minor; if (!adev->sdma.instance[i].fw) return -EINVAL; @@ -676,31 +812,18 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) amdgpu_ucode_print_sdma_hdr(&hdr->header); fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; - version_major = le16_to_cpu(hdr->header.header_version_major); - version_minor = le16_to_cpu(hdr->header.header_version_minor); - - if (version_major == 1 && version_minor >= 1) { - const struct sdma_firmware_header_v1_1 *sdma_v1_1_hdr = (const struct sdma_firmware_header_v1_1 *) hdr; - digest_size = le32_to_cpu(sdma_v1_1_hdr->digest_size); - } - - fw_size -= digest_size; - fw_data = (const __le32 *) (adev->sdma.instance[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), 0); - for (j = 0; j < fw_size; j++) WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); } - sdma_v4_0_print_ucode_regs(adev); - return 0; } @@ -726,7 +849,6 @@ static int sdma_v4_0_start(struct amdgpu_device *adev) } if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - DRM_INFO("Loading via direct write\n"); r = sdma_v4_0_load_microcode(adev); if (r) return r; @@ -764,8 +886,6 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) u32 tmp; u64 gpu_addr; - DRM_INFO("In Ring test func\n"); - r = amdgpu_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); @@ -1038,9 +1158,8 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); unsigned eng = ring->vm_inv_eng; - pd_addr = pd_addr | 0x1; /* valid bit */ - /* now only use physical base address of PDE and valid */ - BUG_ON(pd_addr & 0xFFFF00000000003EULL); + pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); + pd_addr |= AMDGPU_PTE_VALID; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); @@ -1074,7 +1193,10 @@ static int sdma_v4_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->sdma.num_instances = 2; + if (adev->asic_type == CHIP_RAVEN) + adev->sdma.num_instances = 1; + else + adev->sdma.num_instances = 2; sdma_v4_0_set_ring_funcs(adev); sdma_v4_0_set_buffer_funcs(adev); @@ -1406,6 +1528,7 @@ static int sdma_v4_0_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_VEGA10: + case CHIP_RAVEN: sdma_v4_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); sdma_v4_0_update_medium_grain_light_sleep(adev, @@ -1420,6 +1543,17 @@ static int sdma_v4_0_set_clockgating_state(void *handle, static int sdma_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (adev->asic_type) { + case CHIP_RAVEN: + sdma_v4_1_update_power_gating(adev, + state == AMD_PG_STATE_GATE ? true : false); + break; + default: + break; + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index c0b1aabf282f..8284d5dbfc30 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -24,7 +24,7 @@ #include <linux/firmware.h> #include <linux/slab.h> #include <linux/module.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_atombios.h" #include "amdgpu_ih.h" @@ -971,44 +971,44 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v) } static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { - {GRBM_STATUS, false}, - {GB_ADDR_CONFIG, false}, - {MC_ARB_RAMCFG, false}, - {GB_TILE_MODE0, false}, - {GB_TILE_MODE1, false}, - {GB_TILE_MODE2, false}, - {GB_TILE_MODE3, false}, - {GB_TILE_MODE4, false}, - {GB_TILE_MODE5, false}, - {GB_TILE_MODE6, false}, - {GB_TILE_MODE7, false}, - {GB_TILE_MODE8, false}, - {GB_TILE_MODE9, false}, - {GB_TILE_MODE10, false}, - {GB_TILE_MODE11, false}, - {GB_TILE_MODE12, false}, - {GB_TILE_MODE13, false}, - {GB_TILE_MODE14, false}, - {GB_TILE_MODE15, false}, - {GB_TILE_MODE16, false}, - {GB_TILE_MODE17, false}, - {GB_TILE_MODE18, false}, - {GB_TILE_MODE19, false}, - {GB_TILE_MODE20, false}, - {GB_TILE_MODE21, false}, - {GB_TILE_MODE22, false}, - {GB_TILE_MODE23, false}, - {GB_TILE_MODE24, false}, - {GB_TILE_MODE25, false}, - {GB_TILE_MODE26, false}, - {GB_TILE_MODE27, false}, - {GB_TILE_MODE28, false}, - {GB_TILE_MODE29, false}, - {GB_TILE_MODE30, false}, - {GB_TILE_MODE31, false}, - {CC_RB_BACKEND_DISABLE, false, true}, - {GC_USER_RB_BACKEND_DISABLE, false, true}, - {PA_SC_RASTER_CONFIG, false, true}, + {GRBM_STATUS}, + {GB_ADDR_CONFIG}, + {MC_ARB_RAMCFG}, + {GB_TILE_MODE0}, + {GB_TILE_MODE1}, + {GB_TILE_MODE2}, + {GB_TILE_MODE3}, + {GB_TILE_MODE4}, + {GB_TILE_MODE5}, + {GB_TILE_MODE6}, + {GB_TILE_MODE7}, + {GB_TILE_MODE8}, + {GB_TILE_MODE9}, + {GB_TILE_MODE10}, + {GB_TILE_MODE11}, + {GB_TILE_MODE12}, + {GB_TILE_MODE13}, + {GB_TILE_MODE14}, + {GB_TILE_MODE15}, + {GB_TILE_MODE16}, + {GB_TILE_MODE17}, + {GB_TILE_MODE18}, + {GB_TILE_MODE19}, + {GB_TILE_MODE20}, + {GB_TILE_MODE21}, + {GB_TILE_MODE22}, + {GB_TILE_MODE23}, + {GB_TILE_MODE24}, + {GB_TILE_MODE25}, + {GB_TILE_MODE26}, + {GB_TILE_MODE27}, + {GB_TILE_MODE28}, + {GB_TILE_MODE29}, + {GB_TILE_MODE30}, + {GB_TILE_MODE31}, + {CC_RB_BACKEND_DISABLE, true}, + {GC_USER_RB_BACKEND_DISABLE, true}, + {PA_SC_RASTER_CONFIG, true}, }; static uint32_t si_get_register_value(struct amdgpu_device *adev, @@ -1093,13 +1093,13 @@ static int si_read_register(struct amdgpu_device *adev, u32 se_num, *value = 0; for (i = 0; i < ARRAY_SIZE(si_allowed_read_registers); i++) { + bool indexed = si_allowed_read_registers[i].grbm_indexed; + if (reg_offset != si_allowed_read_registers[i].reg_offset) continue; - if (!si_allowed_read_registers[i].untouched) - *value = si_get_register_value(adev, - si_allowed_read_registers[i].grbm_indexed, - se_num, sh_num, reg_offset); + *value = si_get_register_value(adev, indexed, se_num, sh_num, + reg_offset); return 0; } return -EINVAL; @@ -1150,6 +1150,33 @@ static bool si_read_disabled_bios(struct amdgpu_device *adev) return r; } +#define mmROM_INDEX 0x2A +#define mmROM_DATA 0x2B + +static bool si_read_bios_from_rom(struct amdgpu_device *adev, + u8 *bios, u32 length_bytes) +{ + u32 *dw_ptr; + u32 i, length_dw; + + if (bios == NULL) + return false; + if (length_bytes == 0) + return false; + /* APU vbios image is part of sbios image */ + if (adev->flags & AMD_IS_APU) + return false; + + dw_ptr = (u32 *)bios; + length_dw = ALIGN(length_bytes, 4) / 4; + /* set rom index to 0 */ + WREG32(mmROM_INDEX, 0); + for (i = 0; i < length_dw; i++) + dw_ptr[i] = RREG32(mmROM_DATA); + + return true; +} + //xxx: not implemented static int si_asic_reset(struct amdgpu_device *adev) { @@ -1206,6 +1233,7 @@ static void si_detect_hw_virtualization(struct amdgpu_device *adev) static const struct amdgpu_asic_funcs si_asic_funcs = { .read_disabled_bios = &si_read_disabled_bios, + .read_bios_from_rom = &si_read_bios_from_rom, .read_register = &si_read_register, .reset = &si_asic_reset, .set_vga_state = &si_vga_set_state, @@ -1385,6 +1413,7 @@ static void si_init_golden_registers(struct amdgpu_device *adev) amdgpu_program_register_sequence(adev, pitcairn_mgcg_cgcg_init, (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init)); + break; case CHIP_VERDE: amdgpu_program_register_sequence(adev, verde_golden_registers, @@ -1409,6 +1438,7 @@ static void si_init_golden_registers(struct amdgpu_device *adev) amdgpu_program_register_sequence(adev, oland_mgcg_cgcg_init, (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init)); + break; case CHIP_HAINAN: amdgpu_program_register_sequence(adev, hainan_golden_registers, diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 7c1c5d127281..d63873f3f574 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -21,7 +21,7 @@ * */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_dpm.h" @@ -2055,6 +2055,7 @@ static void si_initialize_powertune_defaults(struct amdgpu_device *adev) case 0x682C: si_pi->cac_weights = cac_weights_cape_verde_pro; si_pi->dte_data = dte_data_sun_xt; + update_dte_from_pl2 = true; break; case 0x6825: case 0x6827: diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index e66084211c74..ce25e03a077d 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -20,7 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_ih.h" #include "sid.h" diff --git a/drivers/gpu/drm/amd/amdgpu/si_smc.c b/drivers/gpu/drm/amd/amdgpu/si_smc.c index 0726bc3b6f90..4a2fd8b61940 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/si_smc.c @@ -23,7 +23,7 @@ */ #include <linux/firmware.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "sid.h" #include "ppsmc.h" diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 6b55d451ae7f..f2c3a49f73a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -23,9 +23,9 @@ #include <linux/firmware.h> #include <linux/slab.h> #include <linux/module.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" -#include "amdgpu_atomfirmware.h" +#include "amdgpu_atombios.h" #include "amdgpu_ih.h" #include "amdgpu_uvd.h" #include "amdgpu_vce.h" @@ -57,12 +57,11 @@ #include "sdma_v4_0.h" #include "uvd_v7_0.h" #include "vce_v4_0.h" +#include "vcn_v1_0.h" #include "amdgpu_powerplay.h" #include "dce_virtual.h" #include "mxgpu_ai.h" -MODULE_FIRMWARE("amdgpu/vega10_smc.bin"); - #define mmFabricConfigAccessControl 0x0410 #define mmFabricConfigAccessControl_BASE_IDX 0 #define mmFabricConfigAccessControl_DEFAULT 0x00000000 @@ -104,10 +103,10 @@ static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg) u32 r; struct nbio_pcie_index_data *nbio_pcie_id; - if (adev->asic_type == CHIP_VEGA10) - nbio_pcie_id = &nbio_v6_1_pcie_index_data; + if (adev->flags & AMD_IS_APU) + nbio_pcie_id = &nbio_v7_0_pcie_index_data; else - BUG(); + nbio_pcie_id = &nbio_v6_1_pcie_index_data; address = nbio_pcie_id->index_offset; data = nbio_pcie_id->data_offset; @@ -125,10 +124,10 @@ static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) unsigned long flags, address, data; struct nbio_pcie_index_data *nbio_pcie_id; - if (adev->asic_type == CHIP_VEGA10) - nbio_pcie_id = &nbio_v6_1_pcie_index_data; + if (adev->flags & AMD_IS_APU) + nbio_pcie_id = &nbio_v7_0_pcie_index_data; else - BUG(); + nbio_pcie_id = &nbio_v6_1_pcie_index_data; address = nbio_pcie_id->index_offset; data = nbio_pcie_id->data_offset; @@ -197,15 +196,66 @@ static void soc15_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v) spin_unlock_irqrestore(&adev->didt_idx_lock, flags); } +static u32 soc15_gc_cac_rreg(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&adev->gc_cac_idx_lock, flags); + WREG32_SOC15(GC, 0, mmGC_CAC_IND_INDEX, (reg)); + r = RREG32_SOC15(GC, 0, mmGC_CAC_IND_DATA); + spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags); + return r; +} + +static void soc15_gc_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&adev->gc_cac_idx_lock, flags); + WREG32_SOC15(GC, 0, mmGC_CAC_IND_INDEX, (reg)); + WREG32_SOC15(GC, 0, mmGC_CAC_IND_DATA, (v)); + spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags); +} + +static u32 soc15_se_cac_rreg(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&adev->se_cac_idx_lock, flags); + WREG32_SOC15(GC, 0, mmSE_CAC_IND_INDEX, (reg)); + r = RREG32_SOC15(GC, 0, mmSE_CAC_IND_DATA); + spin_unlock_irqrestore(&adev->se_cac_idx_lock, flags); + return r; +} + +static void soc15_se_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&adev->se_cac_idx_lock, flags); + WREG32_SOC15(GC, 0, mmSE_CAC_IND_INDEX, (reg)); + WREG32_SOC15(GC, 0, mmSE_CAC_IND_DATA, (v)); + spin_unlock_irqrestore(&adev->se_cac_idx_lock, flags); +} + static u32 soc15_get_config_memsize(struct amdgpu_device *adev) { - return nbio_v6_1_get_memsize(adev); + if (adev->flags & AMD_IS_APU) + return nbio_v7_0_get_memsize(adev); + else + return nbio_v6_1_get_memsize(adev); } static const u32 vega10_golden_init[] = { }; +static const u32 raven_golden_init[] = +{ +}; + static void soc15_init_golden_registers(struct amdgpu_device *adev) { /* Some of the registers might be dependent on GRBM_GFX_INDEX */ @@ -217,6 +267,11 @@ static void soc15_init_golden_registers(struct amdgpu_device *adev) vega10_golden_init, (const u32)ARRAY_SIZE(vega10_golden_init)); break; + case CHIP_RAVEN: + amdgpu_program_register_sequence(adev, + raven_golden_init, + (const u32)ARRAY_SIZE(raven_golden_init)); + break; default: break; } @@ -280,29 +335,25 @@ static bool soc15_read_bios_from_rom(struct amdgpu_device *adev, return true; } -static struct amdgpu_allowed_register_entry vega10_allowed_read_registers[] = { - /* todo */ -}; - static struct amdgpu_allowed_register_entry soc15_allowed_read_registers[] = { - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS), false}, - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2), false}, - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE0), false}, - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE1), false}, - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE2), false}, - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE3), false}, - { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_STATUS_REG), false}, - { SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_STATUS_REG), false}, - { SOC15_REG_OFFSET(GC, 0, mmCP_STAT), false}, - { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT1), false}, - { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT2), false}, - { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT3), false}, - { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_BUSY_STAT), false}, - { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STALLED_STAT1), false}, - { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STATUS), false}, - { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STALLED_STAT1), false}, - { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STATUS), false}, - { SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), false}, + { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)}, + { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2)}, + { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE0)}, + { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE1)}, + { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE2)}, + { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE3)}, + { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_STATUS_REG)}, + { SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_STATUS_REG)}, + { SOC15_REG_OFFSET(GC, 0, mmCP_STAT)}, + { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT1)}, + { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT2)}, + { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT3)}, + { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_BUSY_STAT)}, + { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STALLED_STAT1)}, + { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STATUS)}, + { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STALLED_STAT1)}, + { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STATUS)}, + { SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG)}, }; static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num, @@ -341,41 +392,16 @@ static uint32_t soc15_get_register_value(struct amdgpu_device *adev, static int soc15_read_register(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 reg_offset, u32 *value) { - struct amdgpu_allowed_register_entry *asic_register_table = NULL; - struct amdgpu_allowed_register_entry *asic_register_entry; - uint32_t size, i; + uint32_t i; *value = 0; - switch (adev->asic_type) { - case CHIP_VEGA10: - asic_register_table = vega10_allowed_read_registers; - size = ARRAY_SIZE(vega10_allowed_read_registers); - break; - default: - return -EINVAL; - } - - if (asic_register_table) { - for (i = 0; i < size; i++) { - asic_register_entry = asic_register_table + i; - if (reg_offset != asic_register_entry->reg_offset) - continue; - if (!asic_register_entry->untouched) - *value = soc15_get_register_value(adev, - asic_register_entry->grbm_indexed, - se_num, sh_num, reg_offset); - return 0; - } - } - for (i = 0; i < ARRAY_SIZE(soc15_allowed_read_registers); i++) { if (reg_offset != soc15_allowed_read_registers[i].reg_offset) continue; - if (!soc15_allowed_read_registers[i].untouched) - *value = soc15_get_register_value(adev, - soc15_allowed_read_registers[i].grbm_indexed, - se_num, sh_num, reg_offset); + *value = soc15_get_register_value(adev, + soc15_allowed_read_registers[i].grbm_indexed, + se_num, sh_num, reg_offset); return 0; } return -EINVAL; @@ -396,7 +422,10 @@ static void soc15_gpu_pci_config_reset(struct amdgpu_device *adev) /* wait for asic to come out of reset */ for (i = 0; i < adev->usec_timeout; i++) { - if (nbio_v6_1_get_memsize(adev) != 0xffffffff) + u32 memsize = (adev->flags & AMD_IS_APU) ? + nbio_v7_0_get_memsize(adev) : + nbio_v6_1_get_memsize(adev); + if (memsize != 0xffffffff) break; udelay(1); } @@ -405,11 +434,11 @@ static void soc15_gpu_pci_config_reset(struct amdgpu_device *adev) static int soc15_asic_reset(struct amdgpu_device *adev) { - amdgpu_atomfirmware_scratch_regs_engine_hung(adev, true); + amdgpu_atombios_scratch_regs_engine_hung(adev, true); soc15_gpu_pci_config_reset(adev); - amdgpu_atomfirmware_scratch_regs_engine_hung(adev, false); + amdgpu_atombios_scratch_regs_engine_hung(adev, false); return 0; } @@ -470,8 +499,12 @@ static void soc15_program_aspm(struct amdgpu_device *adev) static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev, bool enable) { - nbio_v6_1_enable_doorbell_aperture(adev, enable); - nbio_v6_1_enable_doorbell_selfring_aperture(adev, enable); + if (adev->flags & AMD_IS_APU) { + nbio_v7_0_enable_doorbell_aperture(adev, enable); + } else { + nbio_v6_1_enable_doorbell_aperture(adev, enable); + nbio_v6_1_enable_doorbell_selfring_aperture(adev, enable); + } } static const struct amdgpu_ip_block_version vega10_common_ip_block = @@ -493,8 +526,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: amdgpu_ip_block_add(adev, &vega10_common_ip_block); - amdgpu_ip_block_add(adev, &gfxhub_v1_0_ip_block); - amdgpu_ip_block_add(adev, &mmhub_v1_0_ip_block); amdgpu_ip_block_add(adev, &gmc_v9_0_ip_block); amdgpu_ip_block_add(adev, &vega10_ih_ip_block); if (amdgpu_fw_load_type == 2 || amdgpu_fw_load_type == -1) @@ -508,6 +539,18 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_ip_block_add(adev, &uvd_v7_0_ip_block); amdgpu_ip_block_add(adev, &vce_v4_0_ip_block); break; + case CHIP_RAVEN: + amdgpu_ip_block_add(adev, &vega10_common_ip_block); + amdgpu_ip_block_add(adev, &gmc_v9_0_ip_block); + amdgpu_ip_block_add(adev, &vega10_ih_ip_block); + amdgpu_ip_block_add(adev, &psp_v10_0_ip_block); + amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_ip_block_add(adev, &gfx_v9_0_ip_block); + amdgpu_ip_block_add(adev, &sdma_v4_0_ip_block); + amdgpu_ip_block_add(adev, &vcn_v1_0_ip_block); + break; default: return -EINVAL; } @@ -517,14 +560,10 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) static uint32_t soc15_get_rev_id(struct amdgpu_device *adev) { - return nbio_v6_1_get_rev_id(adev); -} - - -int gmc_v9_0_mc_wait_for_idle(struct amdgpu_device *adev) -{ - /* to be implemented in MC IP*/ - return 0; + if (adev->flags & AMD_IS_APU) + return nbio_v7_0_get_rev_id(adev); + else + return nbio_v6_1_get_rev_id(adev); } static const struct amdgpu_asic_funcs soc15_asic_funcs = @@ -553,6 +592,10 @@ static int soc15_common_early_init(void *handle) adev->uvd_ctx_wreg = &soc15_uvd_ctx_wreg; adev->didt_rreg = &soc15_didt_rreg; adev->didt_wreg = &soc15_didt_wreg; + adev->gc_cac_rreg = &soc15_gc_cac_rreg; + adev->gc_cac_wreg = &soc15_gc_cac_wreg; + adev->se_cac_rreg = &soc15_se_cac_rreg; + adev->se_cac_wreg = &soc15_se_cac_wreg; adev->asic_funcs = &soc15_asic_funcs; @@ -560,11 +603,6 @@ static int soc15_common_early_init(void *handle) (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP))) psp_enabled = true; - if (amdgpu_sriov_vf(adev)) { - amdgpu_virt_init_setting(adev); - xgpu_ai_mailbox_set_irq_funcs(adev); - } - /* * nbio need be used for both sdma and gfx9, but only * initializes once @@ -573,6 +611,9 @@ static int soc15_common_early_init(void *handle) case CHIP_VEGA10: nbio_v6_1_init(adev); break; + case CHIP_RAVEN: + nbio_v7_0_init(adev); + break; default: return -EINVAL; } @@ -603,11 +644,40 @@ static int soc15_common_early_init(void *handle) adev->pg_flags = 0; adev->external_rev_id = 0x1; break; + case CHIP_RAVEN: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_RLC_LS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_DRM_MGCG | + AMD_CG_SUPPORT_DRM_LS | + AMD_CG_SUPPORT_ROM_MGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS; + adev->pg_flags = AMD_PG_SUPPORT_SDMA | + AMD_PG_SUPPORT_MMHUB; + adev->external_rev_id = 0x1; + break; default: /* FIXME: not supported yet */ return -EINVAL; } + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_init_setting(adev); + xgpu_ai_mailbox_set_irq_funcs(adev); + } + adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); amdgpu_get_pcie_info(adev); @@ -650,6 +720,9 @@ static int soc15_common_hw_init(void *handle) soc15_pcie_gen3_enable(adev); /* enable aspm */ soc15_program_aspm(adev); + /* setup nbio registers */ + if (!(adev->flags & AMD_IS_APU)) + nbio_v6_1_init_registers(adev); /* enable the doorbell aperture */ soc15_enable_doorbell_aperture(adev, true); @@ -825,6 +898,20 @@ static int soc15_common_set_clockgating_state(void *handle, soc15_update_df_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); break; + case CHIP_RAVEN: + nbio_v7_0_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + nbio_v6_1_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + soc15_update_hdp_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + soc15_update_drm_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + soc15_update_drm_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + soc15_update_rom_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 378a46da585a..acb3cdb119f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -25,6 +25,7 @@ #define __SOC15_H__ #include "nbio_v6_1.h" +#include "nbio_v7_0.h" extern const struct amd_ip_funcs soc15_common_ip_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index e8df6d820dbe..7a8e4e28abb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -63,6 +63,13 @@ struct nbio_pcie_index_data { (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ (ip##_BASE__INST##inst##_SEG4 + reg)))))) +#define RREG32_SOC15_OFFSET(ip, inst, reg, offset) \ + RREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ + (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ + (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ + (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ + (ip##_BASE__INST##inst##_SEG4 + reg))))) + offset) + #define WREG32_SOC15(ip, inst, reg, value) \ WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ @@ -70,6 +77,20 @@ struct nbio_pcie_index_data { (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ (ip##_BASE__INST##inst##_SEG4 + reg))))), value) +#define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \ + WREG32_NO_KIQ( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ + (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ + (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ + (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ + (ip##_BASE__INST##inst##_SEG4 + reg))))), value) + +#define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \ + WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ + (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ + (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ + (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ + (ip##_BASE__INST##inst##_SEG4 + reg))))) + offset, value) + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 75403c7c8c9e..7f408f85fdb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -132,6 +132,7 @@ * 1 - pfp */ #define PACKET3_INDIRECT_BUFFER 0x3F +#define INDIRECT_BUFFER_VALID (1 << 23) #define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28) /* 0 - LRU * 1 - Stream @@ -249,6 +250,7 @@ #define PACKET3_SET_UCONFIG_REG 0x79 #define PACKET3_SET_UCONFIG_REG_START 0x0000c000 #define PACKET3_SET_UCONFIG_REG_END 0x0000c400 +#define PACKET3_SET_UCONFIG_REG_INDEX_TYPE (2 << 28) #define PACKET3_SCRATCH_RAM_WRITE 0x7D #define PACKET3_SCRATCH_RAM_READ 0x7E #define PACKET3_LOAD_CONST_RAM 0x80 @@ -259,8 +261,97 @@ #define PACKET3_WAIT_ON_CE_COUNTER 0x86 #define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 #define PACKET3_SWITCH_BUFFER 0x8B +#define PACKET3_FRAME_CONTROL 0x90 +# define FRAME_CMD(x) ((x) << 28) + /* + * x=0: tmz_begin + * x=1: tmz_end + */ + #define PACKET3_SET_RESOURCES 0xA0 +/* 1. header + * 2. CONTROL + * 3. QUEUE_MASK_LO [31:0] + * 4. QUEUE_MASK_HI [31:0] + * 5. GWS_MASK_LO [31:0] + * 6. GWS_MASK_HI [31:0] + * 7. OAC_MASK [15:0] + * 8. GDS_HEAP_SIZE [16:11] | GDS_HEAP_BASE [5:0] + */ +# define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0) +# define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16) +# define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29) #define PACKET3_MAP_QUEUES 0xA2 +/* 1. header + * 2. CONTROL + * 3. CONTROL2 + * 4. MQD_ADDR_LO [31:0] + * 5. MQD_ADDR_HI [31:0] + * 6. WPTR_ADDR_LO [31:0] + * 7. WPTR_ADDR_HI [31:0] + */ +/* CONTROL */ +# define PACKET3_MAP_QUEUES_QUEUE_SEL(x) ((x) << 4) +# define PACKET3_MAP_QUEUES_VMID(x) ((x) << 8) +# define PACKET3_MAP_QUEUES_QUEUE(x) ((x) << 13) +# define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 16) +# define PACKET3_MAP_QUEUES_ME(x) ((x) << 18) +# define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21) +# define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24) +# define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26) +# define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29) +/* CONTROL2 */ +# define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1) +# define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2) +#define PACKET3_UNMAP_QUEUES 0xA3 +/* 1. header + * 2. CONTROL + * 3. CONTROL2 + * 4. CONTROL3 + * 5. CONTROL4 + * 6. CONTROL5 + */ +/* CONTROL */ +# define PACKET3_UNMAP_QUEUES_ACTION(x) ((x) << 0) + /* 0 - PREEMPT_QUEUES + * 1 - RESET_QUEUES + * 2 - DISABLE_PROCESS_QUEUES + * 3 - PREEMPT_QUEUES_NO_UNMAP + */ +# define PACKET3_UNMAP_QUEUES_QUEUE_SEL(x) ((x) << 4) +# define PACKET3_UNMAP_QUEUES_ENGINE_SEL(x) ((x) << 26) +# define PACKET3_UNMAP_QUEUES_NUM_QUEUES(x) ((x) << 29) +/* CONTROL2a */ +# define PACKET3_UNMAP_QUEUES_PASID(x) ((x) << 0) +/* CONTROL2b */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(x) ((x) << 2) +/* CONTROL3a */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET1(x) ((x) << 2) +/* CONTROL3b */ +# define PACKET3_UNMAP_QUEUES_RB_WPTR(x) ((x) << 0) +/* CONTROL4 */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET2(x) ((x) << 2) +/* CONTROL5 */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET3(x) ((x) << 2) +#define PACKET3_QUERY_STATUS 0xA4 +/* 1. header + * 2. CONTROL + * 3. CONTROL2 + * 4. ADDR_LO [31:0] + * 5. ADDR_HI [31:0] + * 6. DATA_LO [31:0] + * 7. DATA_HI [31:0] + */ +/* CONTROL */ +# define PACKET3_QUERY_STATUS_CONTEXT_ID(x) ((x) << 0) +# define PACKET3_QUERY_STATUS_INTERRUPT_SEL(x) ((x) << 28) +# define PACKET3_QUERY_STATUS_COMMAND(x) ((x) << 30) +/* CONTROL2a */ +# define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0) +/* CONTROL2b */ +# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2) +# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) + #define VCE_CMD_NO_OP 0x00000000 #define VCE_CMD_END 0x00000001 diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 3a5097ac2bb4..923df2c0e535 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -20,7 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_ih.h" #include "vid.h" diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index eca8f6e01e97..23a85750edd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -58,7 +58,7 @@ static uint64_t uvd_v7_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR)); + return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); } /** @@ -73,9 +73,9 @@ static uint64_t uvd_v7_0_enc_ring_get_rptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; if (ring == &adev->uvd.ring_enc[0]) - return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_RPTR)); + return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); else - return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_RPTR2)); + return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); } /** @@ -89,7 +89,7 @@ static uint64_t uvd_v7_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR)); + return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR); } /** @@ -107,9 +107,9 @@ static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring) return adev->wb.wb[ring->wptr_offs]; if (ring == &adev->uvd.ring_enc[0]) - return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR)); + return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); else - return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR2)); + return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); } /** @@ -123,7 +123,7 @@ static void uvd_v7_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR), lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); } /** @@ -145,10 +145,10 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring) } if (ring == &adev->uvd.ring_enc[0]) - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR), + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); else - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR2), + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); } @@ -165,6 +165,9 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; + if (amdgpu_sriov_vf(adev)) + return 0; + r = amdgpu_ring_alloc(ring, 16); if (r) { DRM_ERROR("amdgpu: uvd enc failed to lock ring %d (%d).\n", @@ -432,13 +435,19 @@ static int uvd_v7_0_sw_init(void *handle) return r; } - for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ring = &adev->uvd.ring_enc[i]; sprintf(ring->name, "uvd_enc%d", i); if (amdgpu_sriov_vf(adev)) { ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2; + + /* currently only use the first enconding ring for + * sriov, so set unused location for other unused rings. + */ + if (i == 0) + ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2; + else + ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1; } r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); if (r) @@ -562,7 +571,13 @@ static int uvd_v7_0_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring = &adev->uvd.ring; - uvd_v7_0_stop(adev); + if (!amdgpu_sriov_vf(adev)) + uvd_v7_0_stop(adev); + else { + /* full access mode, so don't touch any UVD register */ + DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); + } + ring->ready = false; return 0; @@ -611,46 +626,46 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev) uint32_t offset; if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); offset = 0; } else { - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, lower_32_bits(adev->uvd.gpu_addr)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, upper_32_bits(adev->uvd.gpu_addr)); offset = size; } - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, lower_32_bits(adev->uvd.gpu_addr + offset)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, upper_32_bits(adev->uvd.gpu_addr + offset)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21)); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2), + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21)); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_ADDR_CONFIG), + WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG), + WREG32_SOC15(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG), + WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); + WREG32_SOC15(UVD, 0, mmUVD_GP_SCRATCH4, adev->uvd.max_handles); } static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, @@ -664,29 +679,34 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, size = header->header_size + header->vce_table_size + header->uvd_table_size; /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ - WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); - WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); + WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr)); + WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr)); /* 2, update vmid of descriptor */ - data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); + data = RREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_VMID); data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ - WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); + WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_VMID, data); /* 3, notify mmsch about the size of this descriptor */ - WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); + WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE, size); /* 4, set resp to zero */ - WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); + WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0); + + WDOORBELL32(adev->uvd.ring_enc[0].doorbell_index, 0); + adev->wb.wb[adev->uvd.ring_enc[0].wptr_offs] = 0; + adev->uvd.ring_enc[0].wptr = 0; + adev->uvd.ring_enc[0].wptr_old = 0; /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ - WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); + WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST, 0x10000001); - data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); + data = RREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP); loop = 1000; while ((data & 0x10000002) != 0x10000002) { udelay(10); - data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); + data = RREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP); loop--; if (!loop) break; @@ -729,11 +749,9 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) init_table += header->uvd_table_offset; ring = &adev->uvd.ring; + ring->wptr = 0; size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4); - /* disable clock gating */ - MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), - ~UVD_POWER_STATUS__UVD_PG_MODE_MASK, 0); MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0xFFFFFFFF, 0x00000004); /* mc resume*/ @@ -770,12 +788,6 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_ADDR_CONFIG), - adev->gfx.config.gb_addr_config); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG), - adev->gfx.config.gb_addr_config); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG), - adev->gfx.config.gb_addr_config); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); /* mc resume end*/ @@ -812,17 +824,6 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) UVD_LMI_CTRL__REQ_MODE_MASK | 0x00100000L)); - /* disable byte swapping */ - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_SWAP_CNTL), 0); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MP_SWAP_CNTL), 0); - - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA0), 0x40c2040); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA1), 0x0); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB0), 0x40c2040); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB1), 0x0); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_ALU), 0); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUX), 0x88); - /* take all subblocks out of reset, except VCPU */ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); @@ -831,15 +832,6 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), UVD_VCPU_CNTL__CLK_EN_MASK); - /* enable UMC */ - MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0); - - /* boot up the VCPU */ - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0); - - MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0x02, 0x02); - /* enable master interrupt */ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), @@ -852,40 +844,31 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) /* force RBC into idle state */ size = order_base_2(ring->ring_size); tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp); - /* set the write pointer delay */ - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL), 0); - - /* set the wb address */ - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR), - (upper_32_bits(ring->gpu_addr) >> 2)); - - /* programm the RB_BASE for ring buffer */ - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW), - lower_32_bits(ring->gpu_addr)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH), - upper_32_bits(ring->gpu_addr)); - - ring->wptr = 0; ring = &adev->uvd.ring_enc[0]; + ring->wptr = 0; MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4); + /* boot up the VCPU */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0); + + /* enable UMC */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0); + + MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0x02, 0x02); + /* add end packet */ memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; header->uvd_table_size = table_size; - return uvd_v7_0_mmsch_start(adev, &adev->virt.mm_table); } - return -EINVAL; /* already initializaed ? */ + return uvd_v7_0_mmsch_start(adev, &adev->virt.mm_table); } /** @@ -928,7 +911,7 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) mdelay(1); /* put LMI, VCPU, RBC etc... into reset */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | @@ -940,7 +923,7 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) mdelay(5); /* initialize UVD memory controller */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL), + WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | @@ -953,23 +936,23 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) lmi_swap_cntl = 0xa; mp_swap_cntl = 0; #endif - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_SWAP_CNTL), lmi_swap_cntl); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MP_SWAP_CNTL), mp_swap_cntl); + WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); + WREG32_SOC15(UVD, 0, mmUVD_MP_SWAP_CNTL, mp_swap_cntl); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA0), 0x40c2040); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA1), 0x0); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB0), 0x40c2040); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB1), 0x0); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_ALU), 0); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUX), 0x88); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_ALU, 0); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, 0x88); /* take all subblocks out of reset, except VCPU */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); mdelay(5); /* enable VCPU clock */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, UVD_VCPU_CNTL__CLK_EN_MASK); /* enable UMC */ @@ -977,14 +960,14 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); /* boot up the VCPU */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0); + WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, 0); mdelay(10); for (i = 0; i < 10; ++i) { uint32_t status; for (j = 0; j < 100; ++j) { - status = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS)); + status = RREG32_SOC15(UVD, 0, mmUVD_STATUS); if (status & 2) break; mdelay(10); @@ -1025,44 +1008,44 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); /* set the write pointer delay */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL), 0); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0); /* set the wb address */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR), + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR, (upper_32_bits(ring->gpu_addr) >> 2)); /* programm the RB_BASE for ring buffer */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW), + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH), + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr)); /* Initialize the ring buffer's read and write pointers */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR), 0); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0); - ring->wptr = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR), + ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); ring = &adev->uvd.ring_enc[0]; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_RPTR), lower_32_bits(ring->wptr)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR), lower_32_bits(ring->wptr)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4); + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); ring = &adev->uvd.ring_enc[1]; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_RPTR2), lower_32_bits(ring->wptr)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR2), lower_32_bits(ring->wptr)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO2), ring->gpu_addr); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE2), ring->ring_size / 4); + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); return 0; } @@ -1077,7 +1060,7 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) static void uvd_v7_0_stop(struct amdgpu_device *adev) { /* force RBC into idle state */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0x11010101); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101); /* Stall UMC and register bus before resetting VCPU */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), @@ -1086,12 +1069,12 @@ static void uvd_v7_0_stop(struct amdgpu_device *adev) mdelay(1); /* put VCPU into reset */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); mdelay(5); /* disable VCPU clock */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0x0); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 0x0); /* Unstall UMC and register bus */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, @@ -1196,7 +1179,7 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD); + WREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", @@ -1208,7 +1191,7 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID)); + tmp = RREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); @@ -1309,9 +1292,8 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, uint32_t data0, data1, mask; unsigned eng = ring->vm_inv_eng; - pd_addr = pd_addr | 0x1; /* valid bit */ - /* now only use physical base address of PDE and valid */ - BUG_ON(pd_addr & 0xFFFF00000000003EULL); + pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); + pd_addr |= AMDGPU_PTE_VALID; data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; data1 = upper_32_bits(pd_addr); @@ -1350,9 +1332,8 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); unsigned eng = ring->vm_inv_eng; - pd_addr = pd_addr | 0x1; /* valid bit */ - /* now only use physical base address of PDE and valid */ - BUG_ON(pd_addr & 0xFFFF00000000003EULL); + pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); + pd_addr |= AMDGPU_PTE_VALID; amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); @@ -1408,8 +1389,8 @@ static bool uvd_v7_0_check_soft_reset(void *handle) if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) || REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) || - (RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS) & - AMDGPU_UVD_STATUS_BUSY_MASK))) + (RREG32_SOC15(UVD, 0, mmUVD_STATUS) & + AMDGPU_UVD_STATUS_BUSY_MASK)) srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); @@ -1516,9 +1497,9 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev) { uint32_t data, data1, data2, suvd_flags; - data = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL)); - data1 = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_GATE)); - data2 = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_CTRL)); + data = RREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL); + data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE); + data2 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL); data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK); @@ -1562,18 +1543,18 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev) UVD_SUVD_CGC_CTRL__SDB_MODE_MASK); data1 |= suvd_flags; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), data); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_GATE), 0); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_GATE), data1); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_CTRL), data2); + WREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL, data); + WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, 0); + WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1); + WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL, data2); } static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev) { uint32_t data, data1, cgc_flags, suvd_flags; - data = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_GATE)); - data1 = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_GATE)); + data = RREG32_SOC15(UVD, 0, mmUVD_CGC_GATE); + data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE); cgc_flags = UVD_CGC_GATE__SYS_MASK | UVD_CGC_GATE__UDEC_MASK | @@ -1605,8 +1586,8 @@ static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev) data |= cgc_flags; data1 |= suvd_flags; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_GATE), data); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_GATE), data1); + WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, data); + WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1); } static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) @@ -1665,7 +1646,7 @@ static int uvd_v7_0_set_powergating_state(void *handle, if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD)) return 0; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), UVD_POWER_STATUS__UVD_PG_EN_MASK); + WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK); if (state == AMD_PG_STATE_GATE) { uvd_v7_0_stop(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 139f964196b4..11134d5f7443 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -173,6 +173,11 @@ static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, /* 4, set resp to zero */ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); + WDOORBELL32(adev->vce.ring[0].doorbell_index, 0); + adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0; + adev->vce.ring[0].wptr = 0; + adev->vce.ring[0].wptr_old = 0; + /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); @@ -273,7 +278,8 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev) MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), - 0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); + VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, + VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); /* end of MC_RESUME */ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), @@ -295,11 +301,9 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev) memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; header->vce_table_size = table_size; - - return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table); } - return -EINVAL; /* already initializaed ? */ + return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table); } /** @@ -418,15 +422,19 @@ static int vce_v4_0_sw_init(void *handle) if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { const struct common_firmware_header *hdr; + unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); + + adev->vce.saved_bo = kmalloc(size, GFP_KERNEL); + if (!adev->vce.saved_bo) + return -ENOMEM; + hdr = (const struct common_firmware_header *)adev->vce.fw->data; adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); DRM_INFO("PSP loading VCE firmware\n"); - } - - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + } else { r = amdgpu_vce_resume(adev); if (r) return r; @@ -438,12 +446,14 @@ static int vce_v4_0_sw_init(void *handle) if (amdgpu_sriov_vf(adev)) { /* DOORBELL only works under SRIOV */ ring->use_doorbell = true; + + /* currently only use the first encoding ring for sriov, + * so set unused location for other unused rings. + */ if (i == 0) - ring->doorbell_index = AMDGPU_DOORBELL64_RING0_1 * 2; - else if (i == 1) - ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2; + ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2; else - ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2 + 1; + ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1; } r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); if (r) @@ -465,6 +475,11 @@ static int vce_v4_0_sw_fini(void *handle) /* free MM table */ amdgpu_virt_free_mm_table(adev); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + kfree(adev->vce.saved_bo); + adev->vce.saved_bo = NULL; + } + r = amdgpu_vce_suspend(adev); if (r) return r; @@ -505,8 +520,14 @@ static int vce_v4_0_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - /* vce_v4_0_wait_for_idle(handle); */ - vce_v4_0_stop(adev); + if (!amdgpu_sriov_vf(adev)) { + /* vce_v4_0_wait_for_idle(handle); */ + vce_v4_0_stop(adev); + } else { + /* full access mode, so don't touch any VCE register */ + DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); + } + for (i = 0; i < adev->vce.num_rings; i++) adev->vce.ring[i].ready = false; @@ -515,8 +536,18 @@ static int vce_v4_0_hw_fini(void *handle) static int vce_v4_0_suspend(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + if (adev->vce.vcpu_bo == NULL) + return 0; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); + void *ptr = adev->vce.cpu_addr; + + memcpy_fromio(adev->vce.saved_bo, ptr, size); + } r = vce_v4_0_hw_fini(adev); if (r) @@ -527,12 +558,22 @@ static int vce_v4_0_suspend(void *handle) static int vce_v4_0_resume(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; - r = amdgpu_vce_resume(adev); - if (r) - return r; + if (adev->vce.vcpu_bo == NULL) + return -EINVAL; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); + void *ptr = adev->vce.cpu_addr; + + memcpy_toio(ptr, adev->vce.saved_bo, size); + } else { + r = amdgpu_vce_resume(adev); + if (r) + return r; + } return vce_v4_0_hw_init(adev); } @@ -919,9 +960,8 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); unsigned eng = ring->vm_inv_eng; - pd_addr = pd_addr | 0x1; /* valid bit */ - /* now only use physical base address of PDE and valid */ - BUG_ON(pd_addr & 0xFFFF00000000003EULL); + pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); + pd_addr |= AMDGPU_PTE_VALID; amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); @@ -955,11 +995,13 @@ static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, { uint32_t val = 0; - if (state == AMDGPU_IRQ_STATE_ENABLE) - val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; + if (!amdgpu_sriov_vf(adev)) { + if (state == AMDGPU_IRQ_STATE_ENABLE) + val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; - WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, - ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); + WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, + ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c new file mode 100644 index 000000000000..21e7b88401e1 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -0,0 +1,1189 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_vcn.h" +#include "soc15d.h" +#include "soc15_common.h" + +#include "vega10/soc15ip.h" +#include "raven1/VCN/vcn_1_0_offset.h" +#include "raven1/VCN/vcn_1_0_sh_mask.h" +#include "vega10/HDP/hdp_4_0_offset.h" +#include "raven1/MMHUB/mmhub_9_1_offset.h" +#include "raven1/MMHUB/mmhub_9_1_sh_mask.h" + +static int vcn_v1_0_start(struct amdgpu_device *adev); +static int vcn_v1_0_stop(struct amdgpu_device *adev); +static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); +static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); +static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev); + +/** + * vcn_v1_0_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int vcn_v1_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->vcn.num_enc_rings = 2; + + vcn_v1_0_set_dec_ring_funcs(adev); + vcn_v1_0_set_enc_ring_funcs(adev); + vcn_v1_0_set_irq_funcs(adev); + + return 0; +} + +/** + * vcn_v1_0_sw_init - sw init for VCN block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int vcn_v1_0_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + int i, r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* VCN DEC TRAP */ + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCN, 124, &adev->vcn.irq); + if (r) + return r; + + /* VCN ENC TRAP */ + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCN, i + 119, + &adev->vcn.irq); + if (r) + return r; + } + + r = amdgpu_vcn_sw_init(adev); + if (r) + return r; + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + ring = &adev->vcn.ring_dec; + sprintf(ring->name, "vcn_dec"); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.ring_enc[i]; + sprintf(ring->name, "vcn_enc%d", i); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + if (r) + return r; + } + + return r; +} + +/** + * vcn_v1_0_sw_fini - sw fini for VCN block + * + * @handle: amdgpu_device pointer + * + * VCN suspend and free up sw allocation + */ +static int vcn_v1_0_sw_fini(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_suspend(adev); + if (r) + return r; + + r = amdgpu_vcn_sw_fini(adev); + + return r; +} + +/** + * vcn_v1_0_hw_init - start and test VCN block + * + * @handle: amdgpu_device pointer + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vcn_v1_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = &adev->vcn.ring_dec; + int i, r; + + r = vcn_v1_0_start(adev); + if (r) + goto done; + + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + goto done; + } + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.ring_enc[i]; + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + goto done; + } + } + +done: + if (!r) + DRM_INFO("VCN decode and encode initialized successfully.\n"); + + return r; +} + +/** + * vcn_v1_0_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the VCN block, mark ring as not ready any more + */ +static int vcn_v1_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = &adev->vcn.ring_dec; + int r; + + r = vcn_v1_0_stop(adev); + if (r) + return r; + + ring->ready = false; + + return 0; +} + +/** + * vcn_v1_0_suspend - suspend VCN block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend VCN block + */ +static int vcn_v1_0_suspend(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = vcn_v1_0_hw_fini(adev); + if (r) + return r; + + r = amdgpu_vcn_suspend(adev); + + return r; +} + +/** + * vcn_v1_0_resume - resume VCN block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init VCN block + */ +static int vcn_v1_0_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + r = vcn_v1_0_hw_init(adev); + + return r; +} + +/** + * vcn_v1_0_mc_resume - memory controller programming + * + * @adev: amdgpu_device pointer + * + * Let the VCN memory controller know it's offsets + */ +static void vcn_v1_0_mc_resume(struct amdgpu_device *adev) +{ + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size); + + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.gpu_addr + size)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.gpu_addr + size)); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_HEAP_SIZE); + + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.gpu_addr + size + AMDGPU_VCN_HEAP_SIZE)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.gpu_addr + size + AMDGPU_VCN_HEAP_SIZE)); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, + AMDGPU_VCN_STACK_SIZE + (AMDGPU_VCN_SESSION_SIZE * 40)); + + WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); +} + +/** + * vcn_v1_0_disable_clock_gating - disable VCN clock gating + * + * @adev: amdgpu_device pointer + * @sw: enable SW clock gating + * + * Disable clock gating for VCN block + */ +static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw) +{ + uint32_t data; + + /* JPEG disable CGC */ + data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); + + if (sw) + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE_MASK; + + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE); + data &= ~(JPEG_CGC_GATE__JPEG_MASK | JPEG_CGC_GATE__JPEG2_MASK); + WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, data); + + /* UVD disable CGC */ + data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); + if (sw) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data &= ~ UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; + + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, 0, mmUVD_CGC_GATE); + data &= ~(UVD_CGC_GATE__SYS_MASK + | UVD_CGC_GATE__UDEC_MASK + | UVD_CGC_GATE__MPEG2_MASK + | UVD_CGC_GATE__REGS_MASK + | UVD_CGC_GATE__RBC_MASK + | UVD_CGC_GATE__LMI_MC_MASK + | UVD_CGC_GATE__LMI_UMC_MASK + | UVD_CGC_GATE__IDCT_MASK + | UVD_CGC_GATE__MPRD_MASK + | UVD_CGC_GATE__MPC_MASK + | UVD_CGC_GATE__LBSI_MASK + | UVD_CGC_GATE__LRBBM_MASK + | UVD_CGC_GATE__UDEC_RE_MASK + | UVD_CGC_GATE__UDEC_CM_MASK + | UVD_CGC_GATE__UDEC_IT_MASK + | UVD_CGC_GATE__UDEC_DB_MASK + | UVD_CGC_GATE__UDEC_MP_MASK + | UVD_CGC_GATE__WCB_MASK + | UVD_CGC_GATE__VCPU_MASK + | UVD_CGC_GATE__SCPU_MASK); + WREG32_SOC15(VCN, 0, mmUVD_CGC_GATE, data); + + data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); + data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK + | UVD_CGC_CTRL__SCPU_MODE_MASK); + WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); + + /* turn on */ + data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE); + data |= (UVD_SUVD_CGC_GATE__SRE_MASK + | UVD_SUVD_CGC_GATE__SIT_MASK + | UVD_SUVD_CGC_GATE__SMP_MASK + | UVD_SUVD_CGC_GATE__SCM_MASK + | UVD_SUVD_CGC_GATE__SDB_MASK + | UVD_SUVD_CGC_GATE__SRE_H264_MASK + | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK + | UVD_SUVD_CGC_GATE__SIT_H264_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCM_H264_MASK + | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK + | UVD_SUVD_CGC_GATE__SDB_H264_MASK + | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCLR_MASK + | UVD_SUVD_CGC_GATE__UVD_SC_MASK + | UVD_SUVD_CGC_GATE__ENT_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK + | UVD_SUVD_CGC_GATE__SITE_MASK + | UVD_SUVD_CGC_GATE__SRE_VP9_MASK + | UVD_SUVD_CGC_GATE__SCM_VP9_MASK + | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK + | UVD_SUVD_CGC_GATE__SDB_VP9_MASK + | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); + WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE, data); + + data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL); + data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); +} + +/** + * vcn_v1_0_enable_clock_gating - enable VCN clock gating + * + * @adev: amdgpu_device pointer + * @sw: enable SW clock gating + * + * Enable clock gating for VCN block + */ +static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw) +{ + uint32_t data = 0; + + /* enable JPEG CGC */ + data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); + if (sw) + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE); + data |= (JPEG_CGC_GATE__JPEG_MASK | JPEG_CGC_GATE__JPEG2_MASK); + WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, data); + + /* enable UVD CGC */ + data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); + if (sw) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); + data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK + | UVD_CGC_CTRL__SCPU_MODE_MASK); + WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL); + data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); +} + +/** + * vcn_v1_0_start - start VCN block + * + * @adev: amdgpu_device pointer + * + * Setup and start the VCN block + */ +static int vcn_v1_0_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->vcn.ring_dec; + uint32_t rb_bufsz, tmp; + uint32_t lmi_swap_cntl; + int i, j, r; + + /* disable byte swapping */ + lmi_swap_cntl = 0; + + vcn_v1_0_mc_resume(adev); + + /* disable clock gating */ + vcn_v1_0_disable_clock_gating(adev, true); + + /* disable interupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* stall UMC and register bus before resetting VCPU */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), + UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + mdelay(1); + + /* put LMI, VCPU, RBC etc... into reset */ + WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, + UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | + UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | + UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | + UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | + UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | + UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | + UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); + mdelay(5); + + /* initialize VCN memory controller */ + WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, + (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + 0x00100000L); + +#ifdef __BIG_ENDIAN + /* swap (8 in 32) RB and IB */ + lmi_swap_cntl = 0xa; +#endif + WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); + + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_ALU, 0); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, 0x88); + + /* take all subblocks out of reset, except VCPU */ + WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(5); + + /* enable VCPU clock */ + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, + UVD_VCPU_CNTL__CLK_EN_MASK); + + /* enable UMC */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + /* boot up the VCPU */ + WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, 0); + mdelay(10); + + for (i = 0; i < 10; ++i) { + uint32_t status; + + for (j = 0; j < 100; ++j) { + status = RREG32_SOC15(UVD, 0, mmUVD_STATUS); + if (status & 2) + break; + mdelay(10); + } + r = 0; + if (status & 2) + break; + + DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n"); + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(10); + r = -1; + } + + if (r) { + DRM_ERROR("VCN decode not responding, giving up!!!\n"); + return r; + } + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), + (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), + ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); + + /* clear the bit 4 of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); + + /* set the write pointer delay */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0); + + /* set the wb address */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR, + (upper_32_bits(ring->gpu_addr) >> 2)); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0); + + ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, + ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); + + ring = &adev->vcn.ring_enc[0]; + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); + + ring = &adev->vcn.ring_enc[1]; + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); + + return 0; +} + +/** + * vcn_v1_0_stop - stop VCN block + * + * @adev: amdgpu_device pointer + * + * stop the VCN block + */ +static int vcn_v1_0_stop(struct amdgpu_device *adev) +{ + /* force RBC into idle state */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101); + + /* Stall UMC and register bus before resetting VCPU */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), + UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + mdelay(1); + + /* put VCPU into reset */ + WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(5); + + /* disable VCPU clock */ + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 0x0); + + /* Unstall UMC and register bus */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + /* enable clock gating */ + vcn_v1_0_enable_clock_gating(adev, true); + + return 0; +} + +static int vcn_v1_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + /* needed for driver unload*/ + return 0; +} + +/** + * vcn_v1_0_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vcn_v1_0_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); +} + +/** + * vcn_v1_0_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vcn_v1_0_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR); +} + +/** + * vcn_v1_0_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vcn_v1_0_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); +} + +/** + * vcn_v1_0_dec_ring_insert_start - insert a start command + * + * @ring: amdgpu_ring pointer + * + * Write a start command to the ring. + */ +static void vcn_v1_0_dec_ring_insert_start(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_START << 1); +} + +/** + * vcn_v1_0_dec_ring_insert_end - insert a end command + * + * @ring: amdgpu_ring pointer + * + * Write a end command to the ring. + */ +static void vcn_v1_0_dec_ring_insert_end(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_END << 1); +} + +/** + * vcn_v1_0_dec_ring_emit_fence - emit an fence & trap command + * + * @ring: amdgpu_ring pointer + * @fence: fence to emit + * + * Write a fence and a trap command to the ring. + */ +static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) +{ + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); + amdgpu_ring_write(ring, seq); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + amdgpu_ring_write(ring, addr & 0xffffffff); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, VCN_DEC_CMD_FENCE << 1); + + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, VCN_DEC_CMD_TRAP << 1); +} + +/** + * vcn_v1_0_dec_ring_hdp_invalidate - emit an hdp invalidate + * + * @ring: amdgpu_ring pointer + * + * Emits an hdp invalidate. + */ +static void vcn_v1_0_dec_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0), 0)); + amdgpu_ring_write(ring, 1); +} + +/** + * vcn_v1_0_dec_ring_emit_ib - execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @ib: indirect buffer to execute + * + * Write ring commands to execute the indirect buffer + */ +static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch) +{ + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0)); + amdgpu_ring_write(ring, vm_id); + + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0)); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_IB_SIZE), 0)); + amdgpu_ring_write(ring, ib->length_dw); +} + +static void vcn_v1_0_dec_vm_reg_write(struct amdgpu_ring *ring, + uint32_t data0, uint32_t data1) +{ + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + amdgpu_ring_write(ring, data0); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + amdgpu_ring_write(ring, data1); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1); +} + +static void vcn_v1_0_dec_vm_reg_wait(struct amdgpu_ring *ring, + uint32_t data0, uint32_t data1, uint32_t mask) +{ + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + amdgpu_ring_write(ring, data0); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + amdgpu_ring_write(ring, data1); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0)); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, VCN_DEC_CMD_REG_READ_COND_WAIT << 1); +} + +static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vm_id, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); + uint32_t data0, data1, mask; + unsigned eng = ring->vm_inv_eng; + + pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); + pd_addr |= AMDGPU_PTE_VALID; + + data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; + data1 = upper_32_bits(pd_addr); + vcn_v1_0_dec_vm_reg_write(ring, data0, data1); + + data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; + data1 = lower_32_bits(pd_addr); + vcn_v1_0_dec_vm_reg_write(ring, data0, data1); + + data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask); + + /* flush TLB */ + data0 = (hub->vm_inv_eng0_req + eng) << 2; + data1 = req; + vcn_v1_0_dec_vm_reg_write(ring, data0, data1); + + /* wait for flush */ + data0 = (hub->vm_inv_eng0_ack + eng) << 2; + data1 = 1 << vm_id; + mask = 1 << vm_id; + vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask); +} + +/** + * vcn_v1_0_enc_ring_get_rptr - get enc read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc read pointer + */ +static uint64_t vcn_v1_0_enc_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.ring_enc[0]) + return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); + else + return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); +} + + /** + * vcn_v1_0_enc_ring_get_wptr - get enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc write pointer + */ +static uint64_t vcn_v1_0_enc_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.ring_enc[0]) + return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); + else + return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); +} + + /** + * vcn_v1_0_enc_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void vcn_v1_0_enc_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.ring_enc[0]) + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, + lower_32_bits(ring->wptr)); + else + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, + lower_32_bits(ring->wptr)); +} + +/** + * vcn_v1_0_enc_ring_emit_fence - emit an enc fence & trap command + * + * @ring: amdgpu_ring pointer + * @fence: fence to emit + * + * Write enc a fence and a trap command to the ring. + */ +static void vcn_v1_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags) +{ + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, VCN_ENC_CMD_FENCE); + amdgpu_ring_write(ring, addr); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, seq); + amdgpu_ring_write(ring, VCN_ENC_CMD_TRAP); +} + +static void vcn_v1_0_enc_ring_insert_end(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, VCN_ENC_CMD_END); +} + +/** + * vcn_v1_0_enc_ring_emit_ib - enc execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @ib: indirect buffer to execute + * + * Write enc ring commands to execute the indirect buffer + */ +static void vcn_v1_0_enc_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) +{ + amdgpu_ring_write(ring, VCN_ENC_CMD_IB); + amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, ib->length_dw); +} + +static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vm_id, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); + unsigned eng = ring->vm_inv_eng; + + pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); + pd_addr |= AMDGPU_PTE_VALID; + + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, + (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, upper_32_bits(pd_addr)); + + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, + (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, + (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, 0xffffffff); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + /* flush TLB */ + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); + amdgpu_ring_write(ring, req); + + /* wait for flush */ + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); + amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vm_id); +} + +static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: VCN TRAP\n"); + + switch (entry->src_id) { + case 124: + amdgpu_fence_process(&adev->vcn.ring_dec); + break; + case 119: + amdgpu_fence_process(&adev->vcn.ring_enc[0]); + break; + case 120: + amdgpu_fence_process(&adev->vcn.ring_enc[1]); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { + .name = "vcn_v1_0", + .early_init = vcn_v1_0_early_init, + .late_init = NULL, + .sw_init = vcn_v1_0_sw_init, + .sw_fini = vcn_v1_0_sw_fini, + .hw_init = vcn_v1_0_hw_init, + .hw_fini = vcn_v1_0_hw_fini, + .suspend = vcn_v1_0_suspend, + .resume = vcn_v1_0_resume, + .is_idle = NULL /* vcn_v1_0_is_idle */, + .wait_for_idle = NULL /* vcn_v1_0_wait_for_idle */, + .check_soft_reset = NULL /* vcn_v1_0_check_soft_reset */, + .pre_soft_reset = NULL /* vcn_v1_0_pre_soft_reset */, + .soft_reset = NULL /* vcn_v1_0_soft_reset */, + .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */, + .set_clockgating_state = vcn_v1_0_set_clockgating_state, + .set_powergating_state = NULL /* vcn_v1_0_set_powergating_state */, +}; + +static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_DEC, + .align_mask = 0xf, + .nop = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0), + .support_64bit_ptrs = false, + .vmhub = AMDGPU_MMHUB, + .get_rptr = vcn_v1_0_dec_ring_get_rptr, + .get_wptr = vcn_v1_0_dec_ring_get_wptr, + .set_wptr = vcn_v1_0_dec_ring_set_wptr, + .emit_frame_size = + 2 + /* vcn_v1_0_dec_ring_emit_hdp_invalidate */ + 34 + /* vcn_v1_0_dec_ring_emit_vm_flush */ + 14 + 14 + /* vcn_v1_0_dec_ring_emit_fence x2 vm fence */ + 6, + .emit_ib_size = 8, /* vcn_v1_0_dec_ring_emit_ib */ + .emit_ib = vcn_v1_0_dec_ring_emit_ib, + .emit_fence = vcn_v1_0_dec_ring_emit_fence, + .emit_vm_flush = vcn_v1_0_dec_ring_emit_vm_flush, + .emit_hdp_invalidate = vcn_v1_0_dec_ring_emit_hdp_invalidate, + .test_ring = amdgpu_vcn_dec_ring_test_ring, + .test_ib = amdgpu_vcn_dec_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_start = vcn_v1_0_dec_ring_insert_start, + .insert_end = vcn_v1_0_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, +}; + +static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_ENC, + .align_mask = 0x3f, + .nop = VCN_ENC_CMD_NO_OP, + .support_64bit_ptrs = false, + .vmhub = AMDGPU_MMHUB, + .get_rptr = vcn_v1_0_enc_ring_get_rptr, + .get_wptr = vcn_v1_0_enc_ring_get_wptr, + .set_wptr = vcn_v1_0_enc_ring_set_wptr, + .emit_frame_size = + 17 + /* vcn_v1_0_enc_ring_emit_vm_flush */ + 5 + 5 + /* vcn_v1_0_enc_ring_emit_fence x2 vm fence */ + 1, /* vcn_v1_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v1_0_enc_ring_emit_ib */ + .emit_ib = vcn_v1_0_enc_ring_emit_ib, + .emit_fence = vcn_v1_0_enc_ring_emit_fence, + .emit_vm_flush = vcn_v1_0_enc_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_enc_ring_test_ring, + .test_ib = amdgpu_vcn_enc_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v1_0_enc_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, +}; + +static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + adev->vcn.ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs; + DRM_INFO("VCN decode is enabled in VM mode\n"); +} + +static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) + adev->vcn.ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs; + + DRM_INFO("VCN encode is enabled in VM mode\n"); +} + +static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = { + .set = vcn_v1_0_set_interrupt_state, + .process = vcn_v1_0_process_interrupt, +}; + +static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->uvd.irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.irq.funcs = &vcn_v1_0_irq_funcs; +} + +const struct amdgpu_ip_block_version vcn_v1_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_VCN, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &vcn_v1_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h new file mode 100644 index 000000000000..2a497a7a4840 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h @@ -0,0 +1,29 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCN_V1_0_H__ +#define __VCN_V1_0_H__ + +extern const struct amdgpu_ip_block_version vcn_v1_0_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 071f56e439bb..56150e8d1ed2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -20,7 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_ih.h" #include "soc15.h" @@ -97,7 +97,10 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) /* disable irqs */ vega10_ih_disable_interrupts(adev); - nbio_v6_1_ih_control(adev); + if (adev->flags & AMD_IS_APU) + nbio_v7_0_ih_control(adev); + else + nbio_v6_1_ih_control(adev); ih_rb_cntl = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ @@ -148,7 +151,10 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) ENABLE, 0); } WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR), ih_doorbell_rtpr); - nbio_v6_1_ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); + if (adev->flags & AMD_IS_APU) + nbio_v7_0_ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); + else + nbio_v6_1_ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); tmp = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL)); tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL, diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index b1132f5e84fc..9ff69b90df36 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -21,7 +21,7 @@ * */ #include <linux/slab.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_atombios.h" #include "amdgpu_ih.h" @@ -463,89 +463,83 @@ static void vi_detect_hw_virtualization(struct amdgpu_device *adev) } } -static const struct amdgpu_allowed_register_entry tonga_allowed_read_registers[] = { -}; - -static const struct amdgpu_allowed_register_entry cz_allowed_read_registers[] = { -}; - static const struct amdgpu_allowed_register_entry vi_allowed_read_registers[] = { - {mmGRBM_STATUS, false}, - {mmGRBM_STATUS2, false}, - {mmGRBM_STATUS_SE0, false}, - {mmGRBM_STATUS_SE1, false}, - {mmGRBM_STATUS_SE2, false}, - {mmGRBM_STATUS_SE3, false}, - {mmSRBM_STATUS, false}, - {mmSRBM_STATUS2, false}, - {mmSRBM_STATUS3, false}, - {mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET, false}, - {mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET, false}, - {mmCP_STAT, false}, - {mmCP_STALLED_STAT1, false}, - {mmCP_STALLED_STAT2, false}, - {mmCP_STALLED_STAT3, false}, - {mmCP_CPF_BUSY_STAT, false}, - {mmCP_CPF_STALLED_STAT1, false}, - {mmCP_CPF_STATUS, false}, - {mmCP_CPC_BUSY_STAT, false}, - {mmCP_CPC_STALLED_STAT1, false}, - {mmCP_CPC_STATUS, false}, - {mmGB_ADDR_CONFIG, false}, - {mmMC_ARB_RAMCFG, false}, - {mmGB_TILE_MODE0, false}, - {mmGB_TILE_MODE1, false}, - {mmGB_TILE_MODE2, false}, - {mmGB_TILE_MODE3, false}, - {mmGB_TILE_MODE4, false}, - {mmGB_TILE_MODE5, false}, - {mmGB_TILE_MODE6, false}, - {mmGB_TILE_MODE7, false}, - {mmGB_TILE_MODE8, false}, - {mmGB_TILE_MODE9, false}, - {mmGB_TILE_MODE10, false}, - {mmGB_TILE_MODE11, false}, - {mmGB_TILE_MODE12, false}, - {mmGB_TILE_MODE13, false}, - {mmGB_TILE_MODE14, false}, - {mmGB_TILE_MODE15, false}, - {mmGB_TILE_MODE16, false}, - {mmGB_TILE_MODE17, false}, - {mmGB_TILE_MODE18, false}, - {mmGB_TILE_MODE19, false}, - {mmGB_TILE_MODE20, false}, - {mmGB_TILE_MODE21, false}, - {mmGB_TILE_MODE22, false}, - {mmGB_TILE_MODE23, false}, - {mmGB_TILE_MODE24, false}, - {mmGB_TILE_MODE25, false}, - {mmGB_TILE_MODE26, false}, - {mmGB_TILE_MODE27, false}, - {mmGB_TILE_MODE28, false}, - {mmGB_TILE_MODE29, false}, - {mmGB_TILE_MODE30, false}, - {mmGB_TILE_MODE31, false}, - {mmGB_MACROTILE_MODE0, false}, - {mmGB_MACROTILE_MODE1, false}, - {mmGB_MACROTILE_MODE2, false}, - {mmGB_MACROTILE_MODE3, false}, - {mmGB_MACROTILE_MODE4, false}, - {mmGB_MACROTILE_MODE5, false}, - {mmGB_MACROTILE_MODE6, false}, - {mmGB_MACROTILE_MODE7, false}, - {mmGB_MACROTILE_MODE8, false}, - {mmGB_MACROTILE_MODE9, false}, - {mmGB_MACROTILE_MODE10, false}, - {mmGB_MACROTILE_MODE11, false}, - {mmGB_MACROTILE_MODE12, false}, - {mmGB_MACROTILE_MODE13, false}, - {mmGB_MACROTILE_MODE14, false}, - {mmGB_MACROTILE_MODE15, false}, - {mmCC_RB_BACKEND_DISABLE, false, true}, - {mmGC_USER_RB_BACKEND_DISABLE, false, true}, - {mmGB_BACKEND_MAP, false, false}, - {mmPA_SC_RASTER_CONFIG, false, true}, - {mmPA_SC_RASTER_CONFIG_1, false, true}, + {mmGRBM_STATUS}, + {mmGRBM_STATUS2}, + {mmGRBM_STATUS_SE0}, + {mmGRBM_STATUS_SE1}, + {mmGRBM_STATUS_SE2}, + {mmGRBM_STATUS_SE3}, + {mmSRBM_STATUS}, + {mmSRBM_STATUS2}, + {mmSRBM_STATUS3}, + {mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET}, + {mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET}, + {mmCP_STAT}, + {mmCP_STALLED_STAT1}, + {mmCP_STALLED_STAT2}, + {mmCP_STALLED_STAT3}, + {mmCP_CPF_BUSY_STAT}, + {mmCP_CPF_STALLED_STAT1}, + {mmCP_CPF_STATUS}, + {mmCP_CPC_BUSY_STAT}, + {mmCP_CPC_STALLED_STAT1}, + {mmCP_CPC_STATUS}, + {mmGB_ADDR_CONFIG}, + {mmMC_ARB_RAMCFG}, + {mmGB_TILE_MODE0}, + {mmGB_TILE_MODE1}, + {mmGB_TILE_MODE2}, + {mmGB_TILE_MODE3}, + {mmGB_TILE_MODE4}, + {mmGB_TILE_MODE5}, + {mmGB_TILE_MODE6}, + {mmGB_TILE_MODE7}, + {mmGB_TILE_MODE8}, + {mmGB_TILE_MODE9}, + {mmGB_TILE_MODE10}, + {mmGB_TILE_MODE11}, + {mmGB_TILE_MODE12}, + {mmGB_TILE_MODE13}, + {mmGB_TILE_MODE14}, + {mmGB_TILE_MODE15}, + {mmGB_TILE_MODE16}, + {mmGB_TILE_MODE17}, + {mmGB_TILE_MODE18}, + {mmGB_TILE_MODE19}, + {mmGB_TILE_MODE20}, + {mmGB_TILE_MODE21}, + {mmGB_TILE_MODE22}, + {mmGB_TILE_MODE23}, + {mmGB_TILE_MODE24}, + {mmGB_TILE_MODE25}, + {mmGB_TILE_MODE26}, + {mmGB_TILE_MODE27}, + {mmGB_TILE_MODE28}, + {mmGB_TILE_MODE29}, + {mmGB_TILE_MODE30}, + {mmGB_TILE_MODE31}, + {mmGB_MACROTILE_MODE0}, + {mmGB_MACROTILE_MODE1}, + {mmGB_MACROTILE_MODE2}, + {mmGB_MACROTILE_MODE3}, + {mmGB_MACROTILE_MODE4}, + {mmGB_MACROTILE_MODE5}, + {mmGB_MACROTILE_MODE6}, + {mmGB_MACROTILE_MODE7}, + {mmGB_MACROTILE_MODE8}, + {mmGB_MACROTILE_MODE9}, + {mmGB_MACROTILE_MODE10}, + {mmGB_MACROTILE_MODE11}, + {mmGB_MACROTILE_MODE12}, + {mmGB_MACROTILE_MODE13}, + {mmGB_MACROTILE_MODE14}, + {mmGB_MACROTILE_MODE15}, + {mmCC_RB_BACKEND_DISABLE, true}, + {mmGC_USER_RB_BACKEND_DISABLE, true}, + {mmGB_BACKEND_MAP, false}, + {mmPA_SC_RASTER_CONFIG, true}, + {mmPA_SC_RASTER_CONFIG_1, true}, }; static uint32_t vi_get_register_value(struct amdgpu_device *adev, @@ -647,51 +641,17 @@ static uint32_t vi_get_register_value(struct amdgpu_device *adev, static int vi_read_register(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 reg_offset, u32 *value) { - const struct amdgpu_allowed_register_entry *asic_register_table = NULL; - const struct amdgpu_allowed_register_entry *asic_register_entry; - uint32_t size, i; + uint32_t i; *value = 0; - switch (adev->asic_type) { - case CHIP_TOPAZ: - asic_register_table = tonga_allowed_read_registers; - size = ARRAY_SIZE(tonga_allowed_read_registers); - break; - case CHIP_FIJI: - case CHIP_TONGA: - case CHIP_POLARIS11: - case CHIP_POLARIS10: - case CHIP_POLARIS12: - case CHIP_CARRIZO: - case CHIP_STONEY: - asic_register_table = cz_allowed_read_registers; - size = ARRAY_SIZE(cz_allowed_read_registers); - break; - default: - return -EINVAL; - } - - if (asic_register_table) { - for (i = 0; i < size; i++) { - asic_register_entry = asic_register_table + i; - if (reg_offset != asic_register_entry->reg_offset) - continue; - if (!asic_register_entry->untouched) - *value = vi_get_register_value(adev, - asic_register_entry->grbm_indexed, - se_num, sh_num, reg_offset); - return 0; - } - } - for (i = 0; i < ARRAY_SIZE(vi_allowed_read_registers); i++) { + bool indexed = vi_allowed_read_registers[i].grbm_indexed; + if (reg_offset != vi_allowed_read_registers[i].reg_offset) continue; - if (!vi_allowed_read_registers[i].untouched) - *value = vi_get_register_value(adev, - vi_allowed_read_registers[i].grbm_indexed, - se_num, sh_num, reg_offset); + *value = vi_get_register_value(adev, indexed, se_num, sh_num, + reg_offset); return 0; } return -EINVAL; @@ -934,11 +894,6 @@ static int vi_common_early_init(void *handle) (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_SMC))) smc_enabled = true; - if (amdgpu_sriov_vf(adev)) { - amdgpu_virt_init_setting(adev); - xgpu_vi_mailbox_set_irq_funcs(adev); - } - adev->rev_id = vi_get_rev_id(adev); adev->external_rev_id = 0xFF; switch (adev->asic_type) { @@ -1073,8 +1028,7 @@ static int vi_common_early_init(void *handle) /* rev0 hardware requires workarounds to support PG */ adev->pg_flags = 0; if (adev->rev_id != 0x00 || CZ_REV_BRISTOL(adev->pdev->revision)) { - adev->pg_flags |= - AMD_PG_SUPPORT_GFX_SMG | + adev->pg_flags |= AMD_PG_SUPPORT_GFX_SMG | AMD_PG_SUPPORT_GFX_PIPELINE | AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_UVD | @@ -1111,6 +1065,11 @@ static int vi_common_early_init(void *handle) return -EINVAL; } + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_init_setting(adev); + xgpu_vi_mailbox_set_irq_funcs(adev); + } + /* vi use smc load by default */ adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h index 5f2ab9c1609a..a6485254a169 100644 --- a/drivers/gpu/drm/amd/amdgpu/vid.h +++ b/drivers/gpu/drm/amd/amdgpu/vid.h @@ -361,6 +361,12 @@ #define PACKET3_WAIT_ON_CE_COUNTER 0x86 #define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 #define PACKET3_SWITCH_BUFFER 0x8B +#define PACKET3_FRAME_CONTROL 0x90 +# define FRAME_CMD(x) ((x) << 28) + /* + * x=0: tmz_begin + * x=1: tmz_end + */ #define PACKET3_SET_RESOURCES 0xA0 /* 1. header * 2. CONTROL |