summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c189
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c226
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c62
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c98
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c152
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c277
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15d.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c3
39 files changed, 1024 insertions, 576 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a5427cf4b19d..103635ab784c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -76,7 +76,7 @@
extern int amdgpu_modeset;
extern int amdgpu_vram_limit;
extern int amdgpu_vis_vram_limit;
-extern unsigned amdgpu_gart_size;
+extern int amdgpu_gart_size;
extern int amdgpu_gtt_size;
extern int amdgpu_moverate;
extern int amdgpu_benchmarking;
@@ -96,6 +96,7 @@ extern int amdgpu_bapm;
extern int amdgpu_deep_color;
extern int amdgpu_vm_size;
extern int amdgpu_vm_block_size;
+extern int amdgpu_vm_fragment_size;
extern int amdgpu_vm_fault_stop;
extern int amdgpu_vm_debug;
extern int amdgpu_vm_update_mode;
@@ -748,6 +749,7 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
struct amdgpu_fpriv {
struct amdgpu_vm vm;
struct amdgpu_bo_va *prt_va;
+ struct amdgpu_bo_va *csa_va;
struct mutex bo_list_lock;
struct idr bo_list_handles;
struct amdgpu_ctx_mgr ctx_mgr;
@@ -1482,9 +1484,6 @@ struct amdgpu_device {
struct amdgpu_mman mman;
struct amdgpu_vram_scratch vram_scratch;
struct amdgpu_wb wb;
- atomic64_t vram_usage;
- atomic64_t vram_vis_usage;
- atomic64_t gtt_usage;
atomic64_t num_bytes_moved;
atomic64_t num_evictions;
atomic64_t num_vram_cpu_page_faults;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index c7bcf5207d79..5432af39a674 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -28,14 +28,14 @@
#include <linux/module.h>
const struct kgd2kfd_calls *kgd2kfd;
-bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
+bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
int amdgpu_amdkfd_init(void)
{
int ret;
#if defined(CONFIG_HSA_AMD_MODULE)
- int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
+ int (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
kgd2kfd_init_p = symbol_request(kgd2kfd_init);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index b8802a561cbd..8d689ab7e429 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -26,6 +26,7 @@
#define AMDGPU_AMDKFD_H_INCLUDED
#include <linux/types.h>
+#include <linux/mmu_context.h>
#include <kgd_kfd_interface.h>
struct amdgpu_device;
@@ -60,4 +61,19 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
+#define read_user_wptr(mmptr, wptr, dst) \
+ ({ \
+ bool valid = false; \
+ if ((mmptr) && (wptr)) { \
+ if ((mmptr) == current->mm) { \
+ valid = !get_user((dst), (wptr)); \
+ } else if (current->mm == NULL) { \
+ use_mm(mmptr); \
+ valid = !get_user((dst), (wptr)); \
+ unuse_mm(mmptr); \
+ } \
+ } \
+ valid; \
+ })
+
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 5254562fd0f9..b9dbbf9cb8b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -39,6 +39,12 @@
#include "gmc/gmc_7_1_sh_mask.h"
#include "cik_structs.h"
+enum hqd_dequeue_request_type {
+ NO_ACTION = 0,
+ DRAIN_PIPE,
+ RESET_WAVES
+};
+
enum {
MAX_TRAPID = 8, /* 3 bits in the bitfield. */
MAX_WATCH_ADDRESSES = 4
@@ -96,12 +102,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr);
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr);
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm);
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
uint32_t pipe_id, uint32_t queue_id);
-static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+ enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id);
static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
@@ -126,6 +135,33 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
+static void set_scratch_backing_va(struct kgd_dev *kgd,
+ uint64_t va, uint32_t vmid);
+
+/* Because of REG_GET_FIELD() being used, we put this function in the
+ * asic specific file.
+ */
+static int get_tile_config(struct kgd_dev *kgd,
+ struct tile_config *config)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ config->gb_addr_config = adev->gfx.config.gb_addr_config;
+ config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
+ MC_ARB_RAMCFG, NOOFBANK);
+ config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
+ MC_ARB_RAMCFG, NOOFRANKS);
+
+ config->tile_config_ptr = adev->gfx.config.tile_mode_array;
+ config->num_tile_configs =
+ ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+ config->macro_tile_config_ptr =
+ adev->gfx.config.macrotile_mode_array;
+ config->num_macro_tile_configs =
+ ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+
+ return 0;
+}
static const struct kfd2kgd_calls kfd2kgd = {
.init_gtt_mem_allocation = alloc_gtt_mem,
@@ -150,7 +186,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
.get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
.get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
.write_vmid_invalidate_request = write_vmid_invalidate_request,
- .get_fw_version = get_fw_version
+ .get_fw_version = get_fw_version,
+ .set_scratch_backing_va = set_scratch_backing_va,
+ .get_tile_config = get_tile_config,
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
@@ -186,7 +224,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
- uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
lock_srbm(kgd, mec, pipe, queue_id, 0);
@@ -290,20 +328,38 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
}
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr)
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
- uint32_t wptr_shadow, is_wptr_shadow_valid;
struct cik_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, wptr_val, data;
m = get_mqd(mqd);
- is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
- if (is_wptr_shadow_valid)
- m->cp_hqd_pq_wptr = wptr_shadow;
-
acquire_queue(kgd, pipe_id, queue_id);
- gfx_v7_0_mqd_commit(adev, m);
+
+ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+
+ for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
+ WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+ /* Copy userspace write pointer value to register.
+ * Activate doorbell logic to monitor subsequent changes.
+ */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ if (read_user_wptr(mm, wptr, wptr_val))
+ WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32(mmCP_HQD_ACTIVE, data);
+
release_queue(kgd);
return 0;
@@ -382,30 +438,99 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
return false;
}
-static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+ enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t temp;
- int timeout = utimeout;
+ enum hqd_dequeue_request_type type;
+ unsigned long flags, end_jiffies;
+ int retry;
acquire_queue(kgd, pipe_id, queue_id);
WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
- WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type);
+ switch (reset_type) {
+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+ type = DRAIN_PIPE;
+ break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+ type = RESET_WAVES;
+ break;
+ default:
+ type = DRAIN_PIPE;
+ break;
+ }
+
+ /* Workaround: If IQ timer is active and the wait time is close to or
+ * equal to 0, dequeueing is not safe. Wait until either the wait time
+ * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
+ * cleared before continuing. Also, ensure wait times are set to at
+ * least 0x3.
+ */
+ local_irq_save(flags);
+ preempt_disable();
+ retry = 5000; /* wait for 500 usecs at maximum */
+ while (true) {
+ temp = RREG32(mmCP_HQD_IQ_TIMER);
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
+ pr_debug("HW is processing IQ\n");
+ goto loop;
+ }
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
+ == 3) /* SEM-rearm is safe */
+ break;
+ /* Wait time 3 is safe for CP, but our MMIO read/write
+ * time is close to 1 microsecond, so check for 10 to
+ * leave more buffer room
+ */
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
+ >= 10)
+ break;
+ pr_debug("IQ timer is active\n");
+ } else
+ break;
+loop:
+ if (!retry) {
+ pr_err("CP HQD IQ timer status time out\n");
+ break;
+ }
+ ndelay(100);
+ --retry;
+ }
+ retry = 1000;
+ while (true) {
+ temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
+ if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
+ break;
+ pr_debug("Dequeue request is pending\n");
+ if (!retry) {
+ pr_err("CP HQD dequeue request time out\n");
+ break;
+ }
+ ndelay(100);
+ --retry;
+ }
+ local_irq_restore(flags);
+ preempt_enable();
+
+ WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
+
+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
temp = RREG32(mmCP_HQD_ACTIVE);
- if (temp & CP_HQD_ACTIVE__ACTIVE_MASK)
+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
break;
- if (timeout <= 0) {
- pr_err("kfd: cp queue preemption time out.\n");
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("cp queue preemption time out\n");
release_queue(kgd);
return -ETIME;
}
- msleep(20);
- timeout -= 20;
+ usleep_range(500, 1000);
}
release_queue(kgd);
@@ -556,6 +681,16 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
}
+static void set_scratch_backing_va(struct kgd_dev *kgd,
+ uint64_t va, uint32_t vmid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ lock_srbm(kgd, 0, 0, 0, vmid);
+ WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
+ unlock_srbm(kgd);
+}
+
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
{
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
@@ -566,42 +701,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
switch (type) {
case KGD_ENGINE_PFP:
hdr = (const union amdgpu_firmware_header *)
- adev->gfx.pfp_fw->data;
+ adev->gfx.pfp_fw->data;
break;
case KGD_ENGINE_ME:
hdr = (const union amdgpu_firmware_header *)
- adev->gfx.me_fw->data;
+ adev->gfx.me_fw->data;
break;
case KGD_ENGINE_CE:
hdr = (const union amdgpu_firmware_header *)
- adev->gfx.ce_fw->data;
+ adev->gfx.ce_fw->data;
break;
case KGD_ENGINE_MEC1:
hdr = (const union amdgpu_firmware_header *)
- adev->gfx.mec_fw->data;
+ adev->gfx.mec_fw->data;
break;
case KGD_ENGINE_MEC2:
hdr = (const union amdgpu_firmware_header *)
- adev->gfx.mec2_fw->data;
+ adev->gfx.mec2_fw->data;
break;
case KGD_ENGINE_RLC:
hdr = (const union amdgpu_firmware_header *)
- adev->gfx.rlc_fw->data;
+ adev->gfx.rlc_fw->data;
break;
case KGD_ENGINE_SDMA1:
hdr = (const union amdgpu_firmware_header *)
- adev->sdma.instance[0].fw->data;
+ adev->sdma.instance[0].fw->data;
break;
case KGD_ENGINE_SDMA2:
hdr = (const union amdgpu_firmware_header *)
- adev->sdma.instance[1].fw->data;
+ adev->sdma.instance[1].fw->data;
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 133d06671e46..309f2419c6d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -39,6 +39,12 @@
#include "vi_structs.h"
#include "vid.h"
+enum hqd_dequeue_request_type {
+ NO_ACTION = 0,
+ DRAIN_PIPE,
+ RESET_WAVES
+};
+
struct cik_sdma_rlc_registers;
/*
@@ -55,12 +61,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr);
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr);
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm);
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
uint32_t pipe_id, uint32_t queue_id);
static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
-static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+ enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id);
static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
@@ -85,6 +94,33 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
uint8_t vmid);
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
+static void set_scratch_backing_va(struct kgd_dev *kgd,
+ uint64_t va, uint32_t vmid);
+
+/* Because of REG_GET_FIELD() being used, we put this function in the
+ * asic specific file.
+ */
+static int get_tile_config(struct kgd_dev *kgd,
+ struct tile_config *config)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ config->gb_addr_config = adev->gfx.config.gb_addr_config;
+ config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
+ MC_ARB_RAMCFG, NOOFBANK);
+ config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
+ MC_ARB_RAMCFG, NOOFRANKS);
+
+ config->tile_config_ptr = adev->gfx.config.tile_mode_array;
+ config->num_tile_configs =
+ ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+ config->macro_tile_config_ptr =
+ adev->gfx.config.macrotile_mode_array;
+ config->num_macro_tile_configs =
+ ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+
+ return 0;
+}
static const struct kfd2kgd_calls kfd2kgd = {
.init_gtt_mem_allocation = alloc_gtt_mem,
@@ -111,7 +147,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
.get_atc_vmid_pasid_mapping_valid =
get_atc_vmid_pasid_mapping_valid,
.write_vmid_invalidate_request = write_vmid_invalidate_request,
- .get_fw_version = get_fw_version
+ .get_fw_version = get_fw_version,
+ .set_scratch_backing_va = set_scratch_backing_va,
+ .get_tile_config = get_tile_config,
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
@@ -147,7 +185,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
- uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
lock_srbm(kgd, mec, pipe, queue_id, 0);
@@ -216,7 +254,7 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
uint32_t mec;
uint32_t pipe;
- mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
lock_srbm(kgd, mec, pipe, 0, 0);
@@ -244,20 +282,67 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
}
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr)
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm)
{
- struct vi_mqd *m;
- uint32_t shadow_wptr, valid_wptr;
struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct vi_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, wptr_val, data;
m = get_mqd(mqd);
- valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr));
- if (valid_wptr == 0)
- m->cp_hqd_pq_wptr = shadow_wptr;
-
acquire_queue(kgd, pipe_id, queue_id);
- gfx_v8_0_mqd_commit(adev, mqd);
+
+ /* HIQ is set during driver init period with vmid set to 0*/
+ if (m->cp_hqd_vmid == 0) {
+ uint32_t value, mec, pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+ value = RREG32(mmRLC_CP_SCHEDULERS);
+ value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+ ((mec << 5) | (pipe << 3) | queue_id | 0x80));
+ WREG32(mmRLC_CP_SCHEDULERS, value);
+ }
+
+ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+
+ for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++)
+ WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+ /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
+ * This is safe since EOP RPTR==WPTR for any inactive HQD
+ * on ASICs that do not support context-save.
+ * EOP writes/reads can start anywhere in the ring.
+ */
+ if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) {
+ WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
+ WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
+ WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
+ }
+
+ for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++)
+ WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+ /* Copy userspace write pointer value to register.
+ * Activate doorbell logic to monitor subsequent changes.
+ */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ if (read_user_wptr(mm, wptr, wptr_val))
+ WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32(mmCP_HQD_ACTIVE, data);
+
release_queue(kgd);
return 0;
@@ -308,29 +393,102 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
return false;
}
-static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+ enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t temp;
- int timeout = utimeout;
+ enum hqd_dequeue_request_type type;
+ unsigned long flags, end_jiffies;
+ int retry;
+ struct vi_mqd *m = get_mqd(mqd);
acquire_queue(kgd, pipe_id, queue_id);
- WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type);
+ if (m->cp_hqd_vmid == 0)
+ WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0);
+ switch (reset_type) {
+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+ type = DRAIN_PIPE;
+ break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+ type = RESET_WAVES;
+ break;
+ default:
+ type = DRAIN_PIPE;
+ break;
+ }
+
+ /* Workaround: If IQ timer is active and the wait time is close to or
+ * equal to 0, dequeueing is not safe. Wait until either the wait time
+ * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
+ * cleared before continuing. Also, ensure wait times are set to at
+ * least 0x3.
+ */
+ local_irq_save(flags);
+ preempt_disable();
+ retry = 5000; /* wait for 500 usecs at maximum */
+ while (true) {
+ temp = RREG32(mmCP_HQD_IQ_TIMER);
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
+ pr_debug("HW is processing IQ\n");
+ goto loop;
+ }
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
+ == 3) /* SEM-rearm is safe */
+ break;
+ /* Wait time 3 is safe for CP, but our MMIO read/write
+ * time is close to 1 microsecond, so check for 10 to
+ * leave more buffer room
+ */
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
+ >= 10)
+ break;
+ pr_debug("IQ timer is active\n");
+ } else
+ break;
+loop:
+ if (!retry) {
+ pr_err("CP HQD IQ timer status time out\n");
+ break;
+ }
+ ndelay(100);
+ --retry;
+ }
+ retry = 1000;
+ while (true) {
+ temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
+ if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
+ break;
+ pr_debug("Dequeue request is pending\n");
+
+ if (!retry) {
+ pr_err("CP HQD dequeue request time out\n");
+ break;
+ }
+ ndelay(100);
+ --retry;
+ }
+ local_irq_restore(flags);
+ preempt_enable();
+
+ WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
+
+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
temp = RREG32(mmCP_HQD_ACTIVE);
- if (temp & CP_HQD_ACTIVE__ACTIVE_MASK)
+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
break;
- if (timeout <= 0) {
- pr_err("kfd: cp queue preemption time out.\n");
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("cp queue preemption time out.\n");
release_queue(kgd);
return -ETIME;
}
- msleep(20);
- timeout -= 20;
+ usleep_range(500, 1000);
}
release_queue(kgd);
@@ -444,6 +602,16 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
return 0;
}
+static void set_scratch_backing_va(struct kgd_dev *kgd,
+ uint64_t va, uint32_t vmid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ lock_srbm(kgd, 0, 0, 0, vmid);
+ WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
+ unlock_srbm(kgd);
+}
+
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
{
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
@@ -454,42 +622,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
switch (type) {
case KGD_ENGINE_PFP:
hdr = (const union amdgpu_firmware_header *)
- adev->gfx.pfp_fw->data;
+ adev->gfx.pfp_fw->data;
break;
case KGD_ENGINE_ME:
hdr = (const union amdgpu_firmware_header *)
- adev->gfx.me_fw->data;
+ adev->gfx.me_fw->data;
break;
case KGD_ENGINE_CE:
hdr = (const union amdgpu_firmware_header *)
- adev->gfx.ce_fw->data;
+ adev->gfx.ce_fw->data;
break;
case KGD_ENGINE_MEC1:
hdr = (const union amdgpu_firmware_header *)
- adev->gfx.mec_fw->data;
+ adev->gfx.mec_fw->data;
break;
case KGD_ENGINE_MEC2:
hdr = (const union amdgpu_firmware_header *)
- adev->gfx.mec2_fw->data;
+ adev->gfx.mec2_fw->data;
break;
case KGD_ENGINE_RLC:
hdr = (const union amdgpu_firmware_header *)
- adev->gfx.rlc_fw->data;
+ adev->gfx.rlc_fw->data;
break;
case KGD_ENGINE_SDMA1:
hdr = (const union amdgpu_firmware_header *)
- adev->sdma.instance[0].fw->data;
+ adev->sdma.instance[0].fw->data;
break;
case KGD_ENGINE_SDMA2:
hdr = (const union amdgpu_firmware_header *)
- adev->sdma.instance[1].fw->data;
+ adev->sdma.instance[1].fw->data;
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index c05479ec825a..60d8bedb694d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -246,7 +246,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
}
total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
- used_vram = atomic64_read(&adev->vram_usage);
+ used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
spin_lock(&adev->mm_stats.lock);
@@ -292,7 +292,8 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
/* Do the same for visible VRAM if half of it is free */
if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
u64 total_vis_vram = adev->mc.visible_vram_size;
- u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage);
+ u64 used_vis_vram =
+ amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
if (used_vis_vram < total_vis_vram) {
u64 free_vis_vram = total_vis_vram - used_vis_vram;
@@ -673,10 +674,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
}
error_validate:
- if (r) {
- amdgpu_vm_move_pt_bos_in_lru(p->adev, &fpriv->vm);
+ if (r)
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
- }
error_free_pages:
@@ -724,21 +723,18 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
* If error is set than unvalidate buffer, otherwise just free memory
* used by parsing context.
**/
-static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
+static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
+ bool backoff)
{
- struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
unsigned i;
- if (!error) {
- amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
-
+ if (!error)
ttm_eu_fence_buffer_objects(&parser->ticket,
&parser->validated,
parser->fence);
- } else if (backoff) {
+ else if (backoff)
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
- }
for (i = 0; i < parser->num_post_dep_syncobjs; i++)
drm_syncobj_put(parser->post_dep_syncobjs[i]);
@@ -791,7 +787,8 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
if (amdgpu_sriov_vf(adev)) {
struct dma_fence *f;
- bo_va = vm->csa_bo_va;
+
+ bo_va = fpriv->csa_va;
BUG_ON(!bo_va);
r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
@@ -828,7 +825,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
}
- r = amdgpu_vm_clear_invalids(adev, vm, &p->job->sync);
+ r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync);
if (amdgpu_vm_debug && p->bo_list) {
/* Invalidate all BOs to test for userspace bugs */
@@ -1038,7 +1035,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
{
int r;
struct dma_fence *fence;
- r = drm_syncobj_fence_get(p->filp, handle, &fence);
+ r = drm_syncobj_find_fence(p->filp, handle, &fence);
if (r)
return r;
@@ -1082,6 +1079,9 @@ static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
GFP_KERNEL);
p->num_post_dep_syncobjs = 0;
+ if (!p->post_dep_syncobjs)
+ return -ENOMEM;
+
for (i = 0; i < num_deps; ++i) {
p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
if (!p->post_dep_syncobjs[i])
@@ -1153,7 +1153,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
job->uf_sequence = cs->out.handle;
amdgpu_job_free_resources(job);
- amdgpu_cs_parser_fini(p, 0, true);
trace_amdgpu_cs_ioctl(job);
amd_sched_entity_push_job(&job->base);
@@ -1211,10 +1210,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
goto out;
r = amdgpu_cs_submit(&parser, cs);
- if (r)
- goto out;
- return 0;
out:
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
return r;
@@ -1490,7 +1486,7 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
addr > mapping->last)
continue;
- *bo = lobj->bo_va->bo;
+ *bo = lobj->bo_va->base.bo;
return mapping;
}
@@ -1499,7 +1495,7 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
addr > mapping->last)
continue;
- *bo = lobj->bo_va->bo;
+ *bo = lobj->bo_va->base.bo;
return mapping;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a6f6cb0f2e02..e630d918fefc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1062,11 +1062,11 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
}
- if (amdgpu_gart_size < 32) {
+ if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
/* gart size must be greater or equal to 32M */
dev_warn(adev->dev, "gart size (%d) too small\n",
amdgpu_gart_size);
- amdgpu_gart_size = 32;
+ amdgpu_gart_size = -1;
}
if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
@@ -1076,6 +1076,13 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
amdgpu_gtt_size = -1;
}
+ /* valid range is between 4 and 9 inclusive */
+ if (amdgpu_vm_fragment_size != -1 &&
+ (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
+ dev_warn(adev->dev, "valid range is between 4 and 9\n");
+ amdgpu_vm_fragment_size = -1;
+ }
+
amdgpu_check_vm_size(adev);
amdgpu_check_block_size(adev);
@@ -2615,12 +2622,6 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
goto err;
}
- r = amdgpu_ttm_bind(&bo->shadow->tbo, &bo->shadow->tbo.mem);
- if (r) {
- DRM_ERROR("%p bind failed\n", bo->shadow);
- goto err;
- }
-
r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
NULL, fence, true);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 5e9ce8a29669..0f16986ec5bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -68,14 +68,15 @@
* - 3.16.0 - Add reserved vmid support
* - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS.
* - 3.18.0 - Export gpu always on cu bitmap
+ * - 3.19.0 - Add support for UVD MJPEG decode
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 18
+#define KMS_DRIVER_MINOR 19
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
int amdgpu_vis_vram_limit = 0;
-unsigned amdgpu_gart_size = 256;
+int amdgpu_gart_size = -1; /* auto */
int amdgpu_gtt_size = -1; /* auto */
int amdgpu_moverate = -1; /* auto */
int amdgpu_benchmarking = 0;
@@ -94,6 +95,7 @@ unsigned amdgpu_ip_block_mask = 0xffffffff;
int amdgpu_bapm = -1;
int amdgpu_deep_color = 0;
int amdgpu_vm_size = -1;
+int amdgpu_vm_fragment_size = -1;
int amdgpu_vm_block_size = -1;
int amdgpu_vm_fault_stop = 0;
int amdgpu_vm_debug = 0;
@@ -126,7 +128,7 @@ module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
MODULE_PARM_DESC(vis_vramlimit, "Restrict visible VRAM for testing, in megabytes");
module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444);
-MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc.)");
+MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)");
module_param_named(gartsize, amdgpu_gart_size, uint, 0600);
MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)");
@@ -183,6 +185,9 @@ module_param_named(deep_color, amdgpu_deep_color, int, 0444);
MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 64GB)");
module_param_named(vm_size, amdgpu_vm_size, int, 0444);
+MODULE_PARM_DESC(vm_fragment_size, "VM fragment size in bits (4, 5, etc. 4 = 64K (default), Max 9 = 2M)");
+module_param_named(vm_fragment_size, amdgpu_vm_fragment_size, int, 0444);
+
MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)");
module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 94c1e2e8e34c..f4370081f6e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -57,18 +57,6 @@
*/
/**
- * amdgpu_gart_set_defaults - set the default gart_size
- *
- * @adev: amdgpu_device pointer
- *
- * Set the default gart_size based on parameters and available VRAM.
- */
-void amdgpu_gart_set_defaults(struct amdgpu_device *adev)
-{
- adev->mc.gart_size = (uint64_t)amdgpu_gart_size << 20;
-}
-
-/**
* amdgpu_gart_table_ram_alloc - allocate system ram for gart page table
*
* @adev: amdgpu_device pointer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index d4cce6936200..afbe803b1a13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -56,7 +56,6 @@ struct amdgpu_gart {
const struct amdgpu_gart_funcs *gart_funcs;
};
-void amdgpu_gart_set_defaults(struct amdgpu_device *adev);
int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev);
void amdgpu_gart_table_ram_free(struct amdgpu_device *adev);
int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 81127ffcefb2..7171968f261e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -225,9 +225,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
if (args->in.domain_flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
AMDGPU_GEM_CREATE_CPU_GTT_USWC |
- AMDGPU_GEM_CREATE_VRAM_CLEARED|
- AMDGPU_GEM_CREATE_SHADOW |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS))
+ AMDGPU_GEM_CREATE_VRAM_CLEARED))
return -EINVAL;
/* reject invalid gem domains */
@@ -623,7 +621,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
switch (args->operation) {
case AMDGPU_VA_OP_MAP:
- r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address,
+ r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
args->map_size);
if (r)
goto error_backoff;
@@ -643,7 +641,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
args->map_size);
break;
case AMDGPU_VA_OP_REPLACE:
- r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address,
+ r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
args->map_size);
if (r)
goto error_backoff;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 5e6b90c6794f..0d15eb7d31d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -28,7 +28,7 @@
struct amdgpu_gtt_mgr {
struct drm_mm mm;
spinlock_t lock;
- uint64_t available;
+ atomic64_t available;
};
/**
@@ -54,7 +54,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man,
size = (adev->mc.gart_size >> PAGE_SHIFT) - start;
drm_mm_init(&mgr->mm, start, size);
spin_lock_init(&mgr->lock);
- mgr->available = p_size;
+ atomic64_set(&mgr->available, p_size);
man->priv = mgr;
return 0;
}
@@ -108,10 +108,10 @@ bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem)
*
* Allocate the address space for a node.
*/
-int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
- struct ttm_buffer_object *tbo,
- const struct ttm_place *place,
- struct ttm_mem_reg *mem)
+static int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
+ struct ttm_buffer_object *tbo,
+ const struct ttm_place *place,
+ struct ttm_mem_reg *mem)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev);
struct amdgpu_gtt_mgr *mgr = man->priv;
@@ -143,25 +143,12 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
fpfn, lpfn, mode);
spin_unlock(&mgr->lock);
- if (!r) {
+ if (!r)
mem->start = node->start;
- if (&tbo->mem == mem)
- tbo->offset = (tbo->mem.start << PAGE_SHIFT) +
- tbo->bdev->man[tbo->mem.mem_type].gpu_offset;
- }
return r;
}
-void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager *man)
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev);
- struct amdgpu_gtt_mgr *mgr = man->priv;
-
- seq_printf(m, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n",
- man->size, mgr->available, (u64)atomic64_read(&adev->gtt_usage) >> 20);
-
-}
/**
* amdgpu_gtt_mgr_new - allocate a new node
*
@@ -182,11 +169,11 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man,
int r;
spin_lock(&mgr->lock);
- if (mgr->available < mem->num_pages) {
+ if (atomic64_read(&mgr->available) < mem->num_pages) {
spin_unlock(&mgr->lock);
return 0;
}
- mgr->available -= mem->num_pages;
+ atomic64_sub(mem->num_pages, &mgr->available);
spin_unlock(&mgr->lock);
node = kzalloc(sizeof(*node), GFP_KERNEL);
@@ -213,9 +200,7 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man,
return 0;
err_out:
- spin_lock(&mgr->lock);
- mgr->available += mem->num_pages;
- spin_unlock(&mgr->lock);
+ atomic64_add(mem->num_pages, &mgr->available);
return r;
}
@@ -242,30 +227,47 @@ static void amdgpu_gtt_mgr_del(struct ttm_mem_type_manager *man,
spin_lock(&mgr->lock);
if (node->start != AMDGPU_BO_INVALID_OFFSET)
drm_mm_remove_node(node);
- mgr->available += mem->num_pages;
spin_unlock(&mgr->lock);
+ atomic64_add(mem->num_pages, &mgr->available);
kfree(node);
mem->mm_node = NULL;
}
/**
+ * amdgpu_gtt_mgr_usage - return usage of GTT domain
+ *
+ * @man: TTM memory type manager
+ *
+ * Return how many bytes are used in the GTT domain
+ */
+uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man)
+{
+ struct amdgpu_gtt_mgr *mgr = man->priv;
+
+ return (u64)(man->size - atomic64_read(&mgr->available)) * PAGE_SIZE;
+}
+
+/**
* amdgpu_gtt_mgr_debug - dump VRAM table
*
* @man: TTM memory type manager
- * @prefix: text prefix
+ * @printer: DRM printer to use
*
* Dump the table content using printk.
*/
static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man,
- const char *prefix)
+ struct drm_printer *printer)
{
struct amdgpu_gtt_mgr *mgr = man->priv;
- struct drm_printer p = drm_debug_printer(prefix);
spin_lock(&mgr->lock);
- drm_mm_print(&mgr->mm, &p);
+ drm_mm_print(&mgr->mm, printer);
spin_unlock(&mgr->lock);
+
+ drm_printf(printer, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n",
+ man->size, (u64)atomic64_read(&mgr->available),
+ amdgpu_gtt_mgr_usage(man) >> 20);
}
const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 4bdd851f56d0..538e5f27d120 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -221,8 +221,9 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
spin_lock_init(&adev->irq.lock);
- /* Disable vblank irqs aggressively for power-saving */
- adev->ddev->vblank_disable_immediate = true;
+ if (!adev->enable_virtual_display)
+ /* Disable vblank irqs aggressively for power-saving */
+ adev->ddev->vblank_disable_immediate = true;
r = drm_vblank_init(adev->ddev, adev->mode_info.num_crtc);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index c908f972283c..e16229000a98 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -455,13 +455,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
ui64 = atomic64_read(&adev->num_vram_cpu_page_faults);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_VRAM_USAGE:
- ui64 = atomic64_read(&adev->vram_usage);
+ ui64 = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_VIS_VRAM_USAGE:
- ui64 = atomic64_read(&adev->vram_vis_usage);
+ ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_GTT_USAGE:
- ui64 = atomic64_read(&adev->gtt_usage);
+ ui64 = amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_GDS_CONFIG: {
struct drm_amdgpu_info_gds gds_info;
@@ -497,7 +497,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
mem.vram.total_heap_size = adev->mc.real_vram_size;
mem.vram.usable_heap_size =
adev->mc.real_vram_size - adev->vram_pin_size;
- mem.vram.heap_usage = atomic64_read(&adev->vram_usage);
+ mem.vram.heap_usage =
+ amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
mem.cpu_accessible_vram.total_heap_size =
@@ -506,7 +507,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
adev->mc.visible_vram_size -
(adev->vram_pin_size - adev->invisible_pin_size);
mem.cpu_accessible_vram.heap_usage =
- atomic64_read(&adev->vram_vis_usage);
+ amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
mem.cpu_accessible_vram.max_allocation =
mem.cpu_accessible_vram.usable_heap_size * 3 / 4;
@@ -514,7 +515,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
mem.gtt.total_heap_size *= PAGE_SIZE;
mem.gtt.usable_heap_size = mem.gtt.total_heap_size
- adev->gart_pin_size;
- mem.gtt.heap_usage = atomic64_read(&adev->gtt_usage);
+ mem.gtt.heap_usage =
+ amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]);
mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4;
return copy_to_user(out, &mem,
@@ -588,11 +590,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
- dev_info.pte_fragment_size =
- (1 << AMDGPU_LOG2_PAGES_PER_FRAG(adev)) *
- AMDGPU_GPU_PAGE_SIZE;
+ dev_info.pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;
dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE;
-
dev_info.cu_active_number = adev->gfx.cu_info.number;
dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask;
dev_info.ce_ram_size = adev->gfx.ce_ram_size;
@@ -841,7 +840,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
}
if (amdgpu_sriov_vf(adev)) {
- r = amdgpu_map_static_csa(adev, &fpriv->vm);
+ r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va);
if (r)
goto out_suspend;
}
@@ -894,8 +893,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
if (amdgpu_sriov_vf(adev)) {
/* TODO: how to handle reserve failure */
BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true));
- amdgpu_vm_bo_rmv(adev, fpriv->vm.csa_bo_va);
- fpriv->vm.csa_bo_va = NULL;
+ amdgpu_vm_bo_rmv(adev, fpriv->csa_va);
+ fpriv->csa_va = NULL;
amdgpu_bo_unreserve(adev->virt.csa_obj);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 6558a3ed57a7..3b0f2ec6eec7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -51,7 +51,7 @@ struct amdgpu_mn {
/* objects protected by lock */
struct mutex lock;
- struct rb_root objects;
+ struct rb_root_cached objects;
};
struct amdgpu_mn_node {
@@ -76,8 +76,8 @@ static void amdgpu_mn_destroy(struct work_struct *work)
mutex_lock(&adev->mn_lock);
mutex_lock(&rmn->lock);
hash_del(&rmn->node);
- rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects,
- it.rb) {
+ rbtree_postorder_for_each_entry_safe(node, next_node,
+ &rmn->objects.rb_root, it.rb) {
list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
bo->mn = NULL;
list_del_init(&bo->mn_list);
@@ -147,36 +147,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
}
/**
- * amdgpu_mn_invalidate_page - callback to notify about mm change
- *
- * @mn: our notifier
- * @mn: the mm this callback is about
- * @address: address of invalidate page
- *
- * Invalidation of a single page. Blocks for all BOs mapping it
- * and unmap them by move them into system domain again.
- */
-static void amdgpu_mn_invalidate_page(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long address)
-{
- struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
- struct interval_tree_node *it;
-
- mutex_lock(&rmn->lock);
-
- it = interval_tree_iter_first(&rmn->objects, address, address);
- if (it) {
- struct amdgpu_mn_node *node;
-
- node = container_of(it, struct amdgpu_mn_node, it);
- amdgpu_mn_invalidate_node(node, address, address);
- }
-
- mutex_unlock(&rmn->lock);
-}
-
-/**
* amdgpu_mn_invalidate_range_start - callback to notify about mm change
*
* @mn: our notifier
@@ -215,7 +185,6 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
static const struct mmu_notifier_ops amdgpu_mn_ops = {
.release = amdgpu_mn_release,
- .invalidate_page = amdgpu_mn_invalidate_page,
.invalidate_range_start = amdgpu_mn_invalidate_range_start,
};
@@ -252,7 +221,7 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
rmn->mm = mm;
rmn->mn.ops = &amdgpu_mn_ops;
mutex_init(&rmn->lock);
- rmn->objects = RB_ROOT;
+ rmn->objects = RB_ROOT_CACHED;
r = __mmu_notifier_register(&rmn->mn, mm);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 6e72fe7901ec..9e495da0bb03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -37,55 +37,6 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"
-
-
-static u64 amdgpu_get_vis_part_size(struct amdgpu_device *adev,
- struct ttm_mem_reg *mem)
-{
- if (mem->start << PAGE_SHIFT >= adev->mc.visible_vram_size)
- return 0;
-
- return ((mem->start << PAGE_SHIFT) + mem->size) >
- adev->mc.visible_vram_size ?
- adev->mc.visible_vram_size - (mem->start << PAGE_SHIFT) :
- mem->size;
-}
-
-static void amdgpu_update_memory_usage(struct amdgpu_device *adev,
- struct ttm_mem_reg *old_mem,
- struct ttm_mem_reg *new_mem)
-{
- u64 vis_size;
- if (!adev)
- return;
-
- if (new_mem) {
- switch (new_mem->mem_type) {
- case TTM_PL_TT:
- atomic64_add(new_mem->size, &adev->gtt_usage);
- break;
- case TTM_PL_VRAM:
- atomic64_add(new_mem->size, &adev->vram_usage);
- vis_size = amdgpu_get_vis_part_size(adev, new_mem);
- atomic64_add(vis_size, &adev->vram_vis_usage);
- break;
- }
- }
-
- if (old_mem) {
- switch (old_mem->mem_type) {
- case TTM_PL_TT:
- atomic64_sub(old_mem->size, &adev->gtt_usage);
- break;
- case TTM_PL_VRAM:
- atomic64_sub(old_mem->size, &adev->vram_usage);
- vis_size = amdgpu_get_vis_part_size(adev, old_mem);
- atomic64_sub(vis_size, &adev->vram_vis_usage);
- break;
- }
- }
-}
-
static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
@@ -94,7 +45,6 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
bo = container_of(tbo, struct amdgpu_bo, tbo);
amdgpu_bo_kunmap(bo);
- amdgpu_update_memory_usage(adev, &bo->tbo.mem, NULL);
drm_gem_object_release(&bo->gem_base);
amdgpu_bo_unref(&bo->parent);
@@ -141,7 +91,10 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
if (domain & AMDGPU_GEM_DOMAIN_GTT) {
places[c].fpfn = 0;
- places[c].lpfn = 0;
+ if (flags & AMDGPU_GEM_CREATE_SHADOW)
+ places[c].lpfn = adev->mc.gart_size >> PAGE_SHIFT;
+ else
+ places[c].lpfn = 0;
places[c].flags = TTM_PL_FLAG_TT;
if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
places[c].flags |= TTM_PL_FLAG_WC |
@@ -496,17 +449,16 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
if (bo->shadow)
return 0;
- bo->flags |= AMDGPU_GEM_CREATE_SHADOW;
- memset(&placements, 0,
- (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place));
-
- amdgpu_ttm_placement_init(adev, &placement,
- placements, AMDGPU_GEM_DOMAIN_GTT,
- AMDGPU_GEM_CREATE_CPU_GTT_USWC);
+ memset(&placements, 0, sizeof(placements));
+ amdgpu_ttm_placement_init(adev, &placement, placements,
+ AMDGPU_GEM_DOMAIN_GTT,
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+ AMDGPU_GEM_CREATE_SHADOW);
r = amdgpu_bo_create_restricted(adev, size, byte_align, true,
AMDGPU_GEM_DOMAIN_GTT,
- AMDGPU_GEM_CREATE_CPU_GTT_USWC,
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+ AMDGPU_GEM_CREATE_SHADOW,
NULL, &placement,
bo->tbo.resv,
0,
@@ -534,30 +486,28 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
{
struct ttm_placement placement = {0};
struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1];
+ uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW;
int r;
- memset(&placements, 0,
- (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place));
-
- amdgpu_ttm_placement_init(adev, &placement,
- placements, domain, flags);
+ memset(&placements, 0, sizeof(placements));
+ amdgpu_ttm_placement_init(adev, &placement, placements,
+ domain, parent_flags);
- r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel,
- domain, flags, sg, &placement,
- resv, init_value, bo_ptr);
+ r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel, domain,
+ parent_flags, sg, &placement, resv,
+ init_value, bo_ptr);
if (r)
return r;
- if (amdgpu_need_backup(adev) && (flags & AMDGPU_GEM_CREATE_SHADOW)) {
- if (!resv) {
- r = ww_mutex_lock(&(*bo_ptr)->tbo.resv->lock, NULL);
- WARN_ON(r != 0);
- }
+ if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) {
+ if (!resv)
+ WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
+ NULL));
r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr));
if (!resv)
- ww_mutex_unlock(&(*bo_ptr)->tbo.resv->lock);
+ reservation_object_unlock((*bo_ptr)->tbo.resv);
if (r)
amdgpu_bo_unref(bo_ptr);
@@ -992,8 +942,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
return;
/* move_notify is called before move happens */
- amdgpu_update_memory_usage(adev, &bo->mem, new_mem);
-
trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 9b7b4fcb047b..a288fa6d72c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -33,6 +33,7 @@
#define AMDGPU_BO_INVALID_OFFSET LONG_MAX
+/* bo virtual addresses in a vm */
struct amdgpu_bo_va_mapping {
struct list_head list;
struct rb_node rb;
@@ -43,26 +44,19 @@ struct amdgpu_bo_va_mapping {
uint64_t flags;
};
-/* bo virtual addresses in a specific vm */
+/* User space allocated BO in a VM */
struct amdgpu_bo_va {
+ struct amdgpu_vm_bo_base base;
+
/* protected by bo being reserved */
- struct list_head bo_list;
struct dma_fence *last_pt_update;
unsigned ref_count;
- /* protected by vm mutex and spinlock */
- struct list_head vm_status;
-
/* mappings for this bo_va */
struct list_head invalids;
struct list_head valids;
-
- /* constant after initialization */
- struct amdgpu_vm *vm;
- struct amdgpu_bo *bo;
};
-
struct amdgpu_bo {
/* Protected by tbo.reserved */
u32 preferred_domains;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 6c5646b48d1a..5ce65280b396 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -170,6 +170,16 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
unsigned irq_type)
{
int r;
+ int sched_hw_submission = amdgpu_sched_hw_submission;
+
+ /* Set the hw submission limit higher for KIQ because
+ * it's used for a number of gfx/compute tasks by both
+ * KFD and KGD which may have outstanding fences and
+ * it doesn't really use the gpu scheduler anyway;
+ * KIQ tasks get submitted directly to the ring.
+ */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ sched_hw_submission = max(sched_hw_submission, 256);
if (ring->adev == NULL) {
if (adev->num_rings >= AMDGPU_MAX_RINGS)
@@ -178,8 +188,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->adev = adev;
ring->idx = adev->num_rings++;
adev->rings[ring->idx] = ring;
- r = amdgpu_fence_driver_init_ring(ring,
- amdgpu_sched_hw_submission);
+ r = amdgpu_fence_driver_init_ring(ring, sched_hw_submission);
if (r)
return r;
}
@@ -218,8 +227,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
return r;
}
- ring->ring_size = roundup_pow_of_two(max_dw * 4 *
- amdgpu_sched_hw_submission);
+ ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
ring->buf_mask = (ring->ring_size / 4) - 1;
ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index a6899180b265..c586f44312f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -244,6 +244,12 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct dma_fence *f = e->fence;
struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
+ if (dma_fence_is_signaled(f)) {
+ hash_del(&e->node);
+ dma_fence_put(f);
+ kmem_cache_free(amdgpu_sync_slab, e);
+ continue;
+ }
if (ring && s_fence) {
/* For fences from the same ring it is sufficient
* when they are scheduled.
@@ -256,13 +262,6 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
}
}
- if (dma_fence_is_signaled(f)) {
- hash_del(&e->node);
- dma_fence_put(f);
- kmem_cache_free(amdgpu_sync_slab, e);
- continue;
- }
-
return f;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 9ab58245e518..1c88bd5e29ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -14,6 +14,62 @@
#define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished)
+TRACE_EVENT(amdgpu_ttm_tt_populate,
+ TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, uint64_t phys_address),
+ TP_ARGS(adev, dma_address, phys_address),
+ TP_STRUCT__entry(
+ __field(uint16_t, domain)
+ __field(uint8_t, bus)
+ __field(uint8_t, slot)
+ __field(uint8_t, func)
+ __field(uint64_t, dma)
+ __field(uint64_t, phys)
+ ),
+ TP_fast_assign(
+ __entry->domain = pci_domain_nr(adev->pdev->bus);
+ __entry->bus = adev->pdev->bus->number;
+ __entry->slot = PCI_SLOT(adev->pdev->devfn);
+ __entry->func = PCI_FUNC(adev->pdev->devfn);
+ __entry->dma = dma_address;
+ __entry->phys = phys_address;
+ ),
+ TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx",
+ (unsigned)__entry->domain,
+ (unsigned)__entry->bus,
+ (unsigned)__entry->slot,
+ (unsigned)__entry->func,
+ (unsigned long long)__entry->dma,
+ (unsigned long long)__entry->phys)
+);
+
+TRACE_EVENT(amdgpu_ttm_tt_unpopulate,
+ TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, uint64_t phys_address),
+ TP_ARGS(adev, dma_address, phys_address),
+ TP_STRUCT__entry(
+ __field(uint16_t, domain)
+ __field(uint8_t, bus)
+ __field(uint8_t, slot)
+ __field(uint8_t, func)
+ __field(uint64_t, dma)
+ __field(uint64_t, phys)
+ ),
+ TP_fast_assign(
+ __entry->domain = pci_domain_nr(adev->pdev->bus);
+ __entry->bus = adev->pdev->bus->number;
+ __entry->slot = PCI_SLOT(adev->pdev->devfn);
+ __entry->func = PCI_FUNC(adev->pdev->devfn);
+ __entry->dma = dma_address;
+ __entry->phys = phys_address;
+ ),
+ TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx",
+ (unsigned)__entry->domain,
+ (unsigned)__entry->bus,
+ (unsigned)__entry->slot,
+ (unsigned)__entry->func,
+ (unsigned long long)__entry->dma,
+ (unsigned long long)__entry->phys)
+);
+
TRACE_EVENT(amdgpu_mm_rreg,
TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
TP_ARGS(did, reg, value),
@@ -228,7 +284,7 @@ TRACE_EVENT(amdgpu_vm_bo_map,
),
TP_fast_assign(
- __entry->bo = bo_va ? bo_va->bo : NULL;
+ __entry->bo = bo_va ? bo_va->base.bo : NULL;
__entry->start = mapping->start;
__entry->last = mapping->last;
__entry->offset = mapping->offset;
@@ -252,7 +308,7 @@ TRACE_EVENT(amdgpu_vm_bo_unmap,
),
TP_fast_assign(
- __entry->bo = bo_va->bo;
+ __entry->bo = bo_va->base.bo;
__entry->start = mapping->start;
__entry->last = mapping->last;
__entry->offset = mapping->offset;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c803b082324d..7ef6c28a34d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -43,6 +43,7 @@
#include <linux/pagemap.h>
#include <linux/debugfs.h>
#include "amdgpu.h"
+#include "amdgpu_trace.h"
#include "bif/bif_4_1_d.h"
#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
@@ -662,6 +663,38 @@ release_pages:
return r;
}
+static void amdgpu_trace_dma_map(struct ttm_tt *ttm)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ unsigned i;
+
+ if (unlikely(trace_amdgpu_ttm_tt_populate_enabled())) {
+ for (i = 0; i < ttm->num_pages; i++) {
+ trace_amdgpu_ttm_tt_populate(
+ adev,
+ gtt->ttm.dma_address[i],
+ page_to_phys(ttm->pages[i]));
+ }
+ }
+}
+
+static void amdgpu_trace_dma_unmap(struct ttm_tt *ttm)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ unsigned i;
+
+ if (unlikely(trace_amdgpu_ttm_tt_unpopulate_enabled())) {
+ for (i = 0; i < ttm->num_pages; i++) {
+ trace_amdgpu_ttm_tt_unpopulate(
+ adev,
+ gtt->ttm.dma_address[i],
+ page_to_phys(ttm->pages[i]));
+ }
+ }
+}
+
/* prepare the sg table with the user pages */
static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
{
@@ -688,6 +721,8 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
gtt->ttm.dma_address, ttm->num_pages);
+ amdgpu_trace_dma_map(ttm);
+
return 0;
release_sg:
@@ -721,38 +756,16 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
put_page(page);
}
- sg_free_table(ttm->sg);
-}
-
-static int amdgpu_ttm_do_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
-{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
- uint64_t flags;
- int r;
-
- spin_lock(&gtt->adev->gtt_list_lock);
- flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, mem);
- gtt->offset = (u64)mem->start << PAGE_SHIFT;
- r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
- ttm->pages, gtt->ttm.dma_address, flags);
-
- if (r) {
- DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
- ttm->num_pages, gtt->offset);
- goto error_gart_bind;
- }
-
- list_add_tail(&gtt->list, &gtt->adev->gtt_list);
-error_gart_bind:
- spin_unlock(&gtt->adev->gtt_list_lock);
- return r;
+ amdgpu_trace_dma_unmap(ttm);
+ sg_free_table(ttm->sg);
}
static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
struct ttm_mem_reg *bo_mem)
{
struct amdgpu_ttm_tt *gtt = (void*)ttm;
+ uint64_t flags;
int r = 0;
if (gtt->userptr) {
@@ -772,9 +785,24 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
bo_mem->mem_type == AMDGPU_PL_OA)
return -EINVAL;
- if (amdgpu_gtt_mgr_is_allocated(bo_mem))
- r = amdgpu_ttm_do_bind(ttm, bo_mem);
+ if (!amdgpu_gtt_mgr_is_allocated(bo_mem))
+ return 0;
+ spin_lock(&gtt->adev->gtt_list_lock);
+ flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
+ gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
+ r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
+ ttm->pages, gtt->ttm.dma_address, flags);
+
+ if (r) {
+ DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
+ ttm->num_pages, gtt->offset);
+ goto error_gart_bind;
+ }
+
+ list_add_tail(&gtt->list, &gtt->adev->gtt_list);
+error_gart_bind:
+ spin_unlock(&gtt->adev->gtt_list_lock);
return r;
}
@@ -787,20 +815,39 @@ bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct ttm_tt *ttm = bo->ttm;
+ struct ttm_mem_reg tmp;
+
+ struct ttm_placement placement;
+ struct ttm_place placements;
int r;
if (!ttm || amdgpu_ttm_is_bound(ttm))
return 0;
- r = amdgpu_gtt_mgr_alloc(&bo->bdev->man[TTM_PL_TT], bo,
- NULL, bo_mem);
- if (r) {
- DRM_ERROR("Failed to allocate GTT address space (%d)\n", r);
+ tmp = bo->mem;
+ tmp.mm_node = NULL;
+ placement.num_placement = 1;
+ placement.placement = &placements;
+ placement.num_busy_placement = 1;
+ placement.busy_placement = &placements;
+ placements.fpfn = 0;
+ placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT;
+ placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
+
+ r = ttm_bo_mem_space(bo, &placement, &tmp, true, false);
+ if (unlikely(r))
return r;
- }
- return amdgpu_ttm_do_bind(ttm, bo_mem);
+ r = ttm_bo_move_ttm(bo, true, false, &tmp);
+ if (unlikely(r))
+ ttm_bo_mem_put(bo, &tmp);
+ else
+ bo->offset = (bo->mem.start << PAGE_SHIFT) +
+ bo->bdev->man[bo->mem.mem_type].gpu_offset;
+
+ return r;
}
int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
@@ -892,7 +939,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev,
static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
{
- struct amdgpu_device *adev;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
unsigned i;
int r;
@@ -915,14 +962,14 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
gtt->ttm.dma_address, ttm->num_pages);
ttm->state = tt_unbound;
- return 0;
+ r = 0;
+ goto trace_mappings;
}
- adev = amdgpu_ttm_adev(ttm->bdev);
-
#ifdef CONFIG_SWIOTLB
if (swiotlb_nr_tbl()) {
- return ttm_dma_populate(&gtt->ttm, adev->dev);
+ r = ttm_dma_populate(&gtt->ttm, adev->dev);
+ goto trace_mappings;
}
#endif
@@ -945,7 +992,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
return -EFAULT;
}
}
- return 0;
+
+ r = 0;
+trace_mappings:
+ if (likely(!r))
+ amdgpu_trace_dma_map(ttm);
+ return r;
}
static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
@@ -966,6 +1018,8 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
adev = amdgpu_ttm_adev(ttm->bdev);
+ amdgpu_trace_dma_unmap(ttm);
+
#ifdef CONFIG_SWIOTLB
if (swiotlb_nr_tbl()) {
ttm_dma_unpopulate(&gtt->ttm, adev->dev);
@@ -1597,32 +1651,16 @@ error_free:
#if defined(CONFIG_DEBUG_FS)
-extern void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager
- *man);
static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
{
struct drm_info_node *node = (struct drm_info_node *)m->private;
unsigned ttm_pl = *(int *)node->info_ent->data;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
- struct drm_mm *mm = (struct drm_mm *)adev->mman.bdev.man[ttm_pl].priv;
- struct ttm_bo_global *glob = adev->mman.bdev.glob;
+ struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl];
struct drm_printer p = drm_seq_file_printer(m);
- spin_lock(&glob->lru_lock);
- drm_mm_print(mm, &p);
- spin_unlock(&glob->lru_lock);
- switch (ttm_pl) {
- case TTM_PL_VRAM:
- seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
- adev->mman.bdev.man[ttm_pl].size,
- (u64)atomic64_read(&adev->vram_usage) >> 20,
- (u64)atomic64_read(&adev->vram_vis_usage) >> 20);
- break;
- case TTM_PL_TT:
- amdgpu_gtt_mgr_print(m, &adev->mman.bdev.man[TTM_PL_TT]);
- break;
- }
+ man->func->debug(man, &p);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 0e2399f32de7..43093bffa2cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -62,10 +62,10 @@ extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func;
extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func;
bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem);
-int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
- struct ttm_buffer_object *tbo,
- const struct ttm_place *place,
- struct ttm_mem_reg *mem);
+uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man);
+
+uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
+uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);
int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index aefecf6c1e7b..e19928dae8e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -588,6 +588,10 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
}
break;
+ case 8: /* MJPEG */
+ min_dpb_size = 0;
+ break;
+
case 16: /* H265 */
image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2;
image_size = ALIGN(image_size, 256);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 8a081e162d13..ab05121b9272 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -46,14 +46,14 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
* address within META_DATA init package to support SRIOV gfx preemption.
*/
-int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo_va **bo_va)
{
- int r;
- struct amdgpu_bo_va *bo_va;
struct ww_acquire_ctx ticket;
struct list_head list;
struct amdgpu_bo_list_entry pd;
struct ttm_validate_buffer csa_tv;
+ int r;
INIT_LIST_HEAD(&list);
INIT_LIST_HEAD(&csa_tv.head);
@@ -69,34 +69,33 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)
return r;
}
- bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj);
- if (!bo_va) {
+ *bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj);
+ if (!*bo_va) {
ttm_eu_backoff_reservation(&ticket, &list);
DRM_ERROR("failed to create bo_va for static CSA\n");
return -ENOMEM;
}
- r = amdgpu_vm_alloc_pts(adev, bo_va->vm, AMDGPU_CSA_VADDR,
- AMDGPU_CSA_SIZE);
+ r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, AMDGPU_CSA_VADDR,
+ AMDGPU_CSA_SIZE);
if (r) {
DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
- amdgpu_vm_bo_rmv(adev, bo_va);
+ amdgpu_vm_bo_rmv(adev, *bo_va);
ttm_eu_backoff_reservation(&ticket, &list);
return r;
}
- r = amdgpu_vm_bo_map(adev, bo_va, AMDGPU_CSA_VADDR, 0,AMDGPU_CSA_SIZE,
- AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
- AMDGPU_PTE_EXECUTABLE);
+ r = amdgpu_vm_bo_map(adev, *bo_va, AMDGPU_CSA_VADDR, 0, AMDGPU_CSA_SIZE,
+ AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
+ AMDGPU_PTE_EXECUTABLE);
if (r) {
DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
- amdgpu_vm_bo_rmv(adev, bo_va);
+ amdgpu_vm_bo_rmv(adev, *bo_va);
ttm_eu_backoff_reservation(&ticket, &list);
return r;
}
- vm->csa_bo_va = bo_va;
ttm_eu_backoff_reservation(&ticket, &list);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index e5b1baf387c1..afcfb8bcfb65 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -90,7 +90,8 @@ static inline bool is_virtual_machine(void)
struct amdgpu_vm;
int amdgpu_allocate_static_csa(struct amdgpu_device *adev);
-int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo_va **bo_va);
void amdgpu_virt_init_setting(struct amdgpu_device *adev);
uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9ce36652029e..bd20ff018512 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -159,7 +159,8 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
*/
static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,
int (*validate)(void *, struct amdgpu_bo *),
- void *param, bool use_cpu_for_update)
+ void *param, bool use_cpu_for_update,
+ struct ttm_bo_global *glob)
{
unsigned i;
int r;
@@ -183,12 +184,18 @@ static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,
if (r)
return r;
+ spin_lock(&glob->lru_lock);
+ ttm_bo_move_to_lru_tail(&entry->bo->tbo);
+ if (entry->bo->shadow)
+ ttm_bo_move_to_lru_tail(&entry->bo->shadow->tbo);
+ spin_unlock(&glob->lru_lock);
+
/*
* Recurse into the sub directory. This is harmless because we
* have only a maximum of 5 layers.
*/
r = amdgpu_vm_validate_level(entry, validate, param,
- use_cpu_for_update);
+ use_cpu_for_update, glob);
if (r)
return r;
}
@@ -220,54 +227,11 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return 0;
return amdgpu_vm_validate_level(&vm->root, validate, param,
- vm->use_cpu_for_update);
-}
-
-/**
- * amdgpu_vm_move_level_in_lru - move one level of PT BOs to the LRU tail
- *
- * @adev: amdgpu device instance
- * @vm: vm providing the BOs
- *
- * Move the PT BOs to the tail of the LRU.
- */
-static void amdgpu_vm_move_level_in_lru(struct amdgpu_vm_pt *parent)
-{
- unsigned i;
-
- if (!parent->entries)
- return;
-
- for (i = 0; i <= parent->last_entry_used; ++i) {
- struct amdgpu_vm_pt *entry = &parent->entries[i];
-
- if (!entry->bo)
- continue;
-
- ttm_bo_move_to_lru_tail(&entry->bo->tbo);
- amdgpu_vm_move_level_in_lru(entry);
- }
+ vm->use_cpu_for_update,
+ adev->mman.bdev.glob);
}
/**
- * amdgpu_vm_move_pt_bos_in_lru - move the PT BOs to the LRU tail
- *
- * @adev: amdgpu device instance
- * @vm: vm providing the BOs
- *
- * Move the PT BOs to the tail of the LRU.
- */
-void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
-{
- struct ttm_bo_global *glob = adev->mman.bdev.glob;
-
- spin_lock(&glob->lru_lock);
- amdgpu_vm_move_level_in_lru(&vm->root);
- spin_unlock(&glob->lru_lock);
-}
-
- /**
* amdgpu_vm_alloc_levels - allocate the PD/PT levels
*
* @adev: amdgpu_device pointer
@@ -359,7 +323,6 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
entry->bo = pt;
entry->addr = 0;
- entry->huge_page = false;
}
if (level < adev->vm_manager.num_level) {
@@ -899,8 +862,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
{
struct amdgpu_bo_va *bo_va;
- list_for_each_entry(bo_va, &bo->va, bo_list) {
- if (bo_va->vm == vm) {
+ list_for_each_entry(bo_va, &bo->va, base.bo_list) {
+ if (bo_va->base.vm == vm) {
return bo_va;
}
}
@@ -1074,11 +1037,6 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
params.func = amdgpu_vm_cpu_set_ptes;
} else {
- if (shadow) {
- r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
- if (r)
- return r;
- }
ring = container_of(vm->entity.sched, struct amdgpu_ring,
sched);
@@ -1114,22 +1072,14 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
if (bo == NULL)
continue;
- if (bo->shadow) {
- struct amdgpu_bo *pt_shadow = bo->shadow;
-
- r = amdgpu_ttm_bind(&pt_shadow->tbo,
- &pt_shadow->tbo.mem);
- if (r)
- return r;
- }
-
pt = amdgpu_bo_gpu_offset(bo);
pt = amdgpu_gart_get_vm_pde(adev, pt);
- if (parent->entries[pt_idx].addr == pt ||
- parent->entries[pt_idx].huge_page)
+ /* Don't update huge pages here */
+ if ((parent->entries[pt_idx].addr & AMDGPU_PDE_PTE) ||
+ parent->entries[pt_idx].addr == (pt | AMDGPU_PTE_VALID))
continue;
- parent->entries[pt_idx].addr = pt;
+ parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID;
pde = pd_addr + pt_idx * 8;
if (((last_pde + 8 * count) != pde) ||
@@ -1307,43 +1257,52 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
*
* Check if we can update the PD with a huge page.
*/
-static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
- struct amdgpu_vm_pt *entry,
- struct amdgpu_vm_pt *parent,
- unsigned nptes, uint64_t dst,
- uint64_t flags)
+static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
+ struct amdgpu_vm_pt *entry,
+ struct amdgpu_vm_pt *parent,
+ unsigned nptes, uint64_t dst,
+ uint64_t flags)
{
bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes);
uint64_t pd_addr, pde;
- int r;
/* In the case of a mixed PT the PDE must point to it*/
if (p->adev->asic_type < CHIP_VEGA10 ||
nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
- p->func == amdgpu_vm_do_copy_ptes ||
+ p->src ||
!(flags & AMDGPU_PTE_VALID)) {
dst = amdgpu_bo_gpu_offset(entry->bo);
dst = amdgpu_gart_get_vm_pde(p->adev, dst);
flags = AMDGPU_PTE_VALID;
} else {
+ /* Set the huge page flag to stop scanning at this PDE */
flags |= AMDGPU_PDE_PTE;
}
- if (entry->addr == dst &&
- entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
- return 0;
+ if (entry->addr == (dst | flags))
+ return;
- entry->addr = dst;
- entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
+ entry->addr = (dst | flags);
if (use_cpu_update) {
- r = amdgpu_bo_kmap(parent->bo, (void *)&pd_addr);
- if (r)
- return r;
+ /* In case a huge page is replaced with a system
+ * memory mapping, p->pages_addr != NULL and
+ * amdgpu_vm_cpu_set_ptes would try to translate dst
+ * through amdgpu_vm_map_gart. But dst is already a
+ * GPU address (of the page table). Disable
+ * amdgpu_vm_map_gart temporarily.
+ */
+ dma_addr_t *tmp;
+
+ tmp = p->pages_addr;
+ p->pages_addr = NULL;
+ pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo);
pde = pd_addr + (entry - parent->entries) * 8;
amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
+
+ p->pages_addr = tmp;
} else {
if (parent->bo->shadow) {
pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
@@ -1354,8 +1313,6 @@ static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
pde = pd_addr + (entry - parent->entries) * 8;
amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
}
-
- return 0;
}
/**
@@ -1382,7 +1339,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
struct amdgpu_bo *pt;
unsigned nptes;
bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes);
- int r;
/* walk over the address space and update the page tables */
for (addr = start; addr < end; addr += nptes,
@@ -1398,12 +1354,10 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
else
nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
- r = amdgpu_vm_handle_huge_pages(params, entry, parent,
- nptes, dst, flags);
- if (r)
- return r;
-
- if (entry->huge_page)
+ amdgpu_vm_handle_huge_pages(params, entry, parent,
+ nptes, dst, flags);
+ /* We don't need to update PTEs for huge pages */
+ if (entry->addr & AMDGPU_PDE_PTE)
continue;
pt = entry->bo;
@@ -1462,9 +1416,7 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
* Userspace can support this by aligning virtual base address and
* allocation size to the fragment size.
*/
-
- /* SI and newer are optimized for 64KB */
- unsigned pages_per_frag = AMDGPU_LOG2_PAGES_PER_FRAG(params->adev);
+ unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
uint64_t frag_align = 1 << pages_per_frag;
@@ -1664,7 +1616,6 @@ error_free:
*
* @adev: amdgpu_device pointer
* @exclusive: fence we need to sync to
- * @gtt_flags: flags as they are used for GTT
* @pages_addr: DMA addresses to use for mapping
* @vm: requested vm
* @mapping: mapped range and flags to use for the update
@@ -1678,7 +1629,6 @@ error_free:
*/
static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
struct dma_fence *exclusive,
- uint64_t gtt_flags,
dma_addr_t *pages_addr,
struct amdgpu_vm *vm,
struct amdgpu_bo_va_mapping *mapping,
@@ -1733,11 +1683,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
}
if (pages_addr) {
- if (flags == gtt_flags)
- src = adev->gart.table_addr +
- (addr >> AMDGPU_GPU_PAGE_SHIFT) * 8;
- else
- max_entries = min(max_entries, 16ull * 1024ull);
+ max_entries = min(max_entries, 16ull * 1024ull);
addr = 0;
} else if (flags & AMDGPU_PTE_VALID) {
addr += adev->vm_manager.vram_base_offset;
@@ -1778,50 +1724,45 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
bool clear)
{
- struct amdgpu_vm *vm = bo_va->vm;
+ struct amdgpu_bo *bo = bo_va->base.bo;
+ struct amdgpu_vm *vm = bo_va->base.vm;
struct amdgpu_bo_va_mapping *mapping;
dma_addr_t *pages_addr = NULL;
- uint64_t gtt_flags, flags;
struct ttm_mem_reg *mem;
struct drm_mm_node *nodes;
struct dma_fence *exclusive;
+ uint64_t flags;
int r;
- if (clear || !bo_va->bo) {
+ if (clear || !bo_va->base.bo) {
mem = NULL;
nodes = NULL;
exclusive = NULL;
} else {
struct ttm_dma_tt *ttm;
- mem = &bo_va->bo->tbo.mem;
+ mem = &bo_va->base.bo->tbo.mem;
nodes = mem->mm_node;
if (mem->mem_type == TTM_PL_TT) {
- ttm = container_of(bo_va->bo->tbo.ttm, struct
- ttm_dma_tt, ttm);
+ ttm = container_of(bo_va->base.bo->tbo.ttm,
+ struct ttm_dma_tt, ttm);
pages_addr = ttm->dma_address;
}
- exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv);
+ exclusive = reservation_object_get_excl(bo->tbo.resv);
}
- if (bo_va->bo) {
- flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
- gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
- adev == amdgpu_ttm_adev(bo_va->bo->tbo.bdev)) ?
- flags : 0;
- } else {
+ if (bo)
+ flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
+ else
flags = 0x0;
- gtt_flags = ~0x0;
- }
spin_lock(&vm->status_lock);
- if (!list_empty(&bo_va->vm_status))
+ if (!list_empty(&bo_va->base.vm_status))
list_splice_init(&bo_va->valids, &bo_va->invalids);
spin_unlock(&vm->status_lock);
list_for_each_entry(mapping, &bo_va->invalids, list) {
- r = amdgpu_vm_bo_split_mapping(adev, exclusive,
- gtt_flags, pages_addr, vm,
+ r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
mapping, flags, nodes,
&bo_va->last_pt_update);
if (r)
@@ -1838,9 +1779,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
spin_lock(&vm->status_lock);
list_splice_init(&bo_va->invalids, &bo_va->valids);
- list_del_init(&bo_va->vm_status);
+ list_del_init(&bo_va->base.vm_status);
if (clear)
- list_add(&bo_va->vm_status, &vm->cleared);
+ list_add(&bo_va->base.vm_status, &vm->cleared);
spin_unlock(&vm->status_lock);
if (vm->use_cpu_for_update) {
@@ -2034,26 +1975,26 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
}
/**
- * amdgpu_vm_clear_invalids - clear invalidated BOs in the PT
+ * amdgpu_vm_clear_moved - clear moved BOs in the PT
*
* @adev: amdgpu_device pointer
* @vm: requested vm
*
- * Make sure all invalidated BOs are cleared in the PT.
+ * Make sure all moved BOs are cleared in the PT.
* Returns 0 for success.
*
* PTs have to be reserved and mutex must be locked!
*/
-int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
- struct amdgpu_vm *vm, struct amdgpu_sync *sync)
+int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_sync *sync)
{
struct amdgpu_bo_va *bo_va = NULL;
int r = 0;
spin_lock(&vm->status_lock);
- while (!list_empty(&vm->invalidated)) {
- bo_va = list_first_entry(&vm->invalidated,
- struct amdgpu_bo_va, vm_status);
+ while (!list_empty(&vm->moved)) {
+ bo_va = list_first_entry(&vm->moved,
+ struct amdgpu_bo_va, base.vm_status);
spin_unlock(&vm->status_lock);
r = amdgpu_vm_bo_update(adev, bo_va, true);
@@ -2093,16 +2034,17 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
if (bo_va == NULL) {
return NULL;
}
- bo_va->vm = vm;
- bo_va->bo = bo;
+ bo_va->base.vm = vm;
+ bo_va->base.bo = bo;
+ INIT_LIST_HEAD(&bo_va->base.bo_list);
+ INIT_LIST_HEAD(&bo_va->base.vm_status);
+
bo_va->ref_count = 1;
- INIT_LIST_HEAD(&bo_va->bo_list);
INIT_LIST_HEAD(&bo_va->valids);
INIT_LIST_HEAD(&bo_va->invalids);
- INIT_LIST_HEAD(&bo_va->vm_status);
if (bo)
- list_add_tail(&bo_va->bo_list, &bo->va);
+ list_add_tail(&bo_va->base.bo_list, &bo->va);
return bo_va;
}
@@ -2127,7 +2069,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
uint64_t size, uint64_t flags)
{
struct amdgpu_bo_va_mapping *mapping, *tmp;
- struct amdgpu_vm *vm = bo_va->vm;
+ struct amdgpu_bo *bo = bo_va->base.bo;
+ struct amdgpu_vm *vm = bo_va->base.vm;
uint64_t eaddr;
/* validate the parameters */
@@ -2138,7 +2081,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
/* make sure object fit at this offset */
eaddr = saddr + size - 1;
if (saddr >= eaddr ||
- (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo)))
+ (bo && offset + size > amdgpu_bo_size(bo)))
return -EINVAL;
saddr /= AMDGPU_GPU_PAGE_SIZE;
@@ -2148,7 +2091,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
if (tmp) {
/* bo and tmp overlap, invalid addr */
dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
- "0x%010Lx-0x%010Lx\n", bo_va->bo, saddr, eaddr,
+ "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr,
tmp->start, tmp->last + 1);
return -EINVAL;
}
@@ -2193,7 +2136,8 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
uint64_t size, uint64_t flags)
{
struct amdgpu_bo_va_mapping *mapping;
- struct amdgpu_vm *vm = bo_va->vm;
+ struct amdgpu_bo *bo = bo_va->base.bo;
+ struct amdgpu_vm *vm = bo_va->base.vm;
uint64_t eaddr;
int r;
@@ -2205,7 +2149,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
/* make sure object fit at this offset */
eaddr = saddr + size - 1;
if (saddr >= eaddr ||
- (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo)))
+ (bo && offset + size > amdgpu_bo_size(bo)))
return -EINVAL;
/* Allocate all the needed memory */
@@ -2213,7 +2157,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
if (!mapping)
return -ENOMEM;
- r = amdgpu_vm_bo_clear_mappings(adev, bo_va->vm, saddr, size);
+ r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size);
if (r) {
kfree(mapping);
return r;
@@ -2253,7 +2197,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
uint64_t saddr)
{
struct amdgpu_bo_va_mapping *mapping;
- struct amdgpu_vm *vm = bo_va->vm;
+ struct amdgpu_vm *vm = bo_va->base.vm;
bool valid = true;
saddr /= AMDGPU_GPU_PAGE_SIZE;
@@ -2401,12 +2345,12 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va)
{
struct amdgpu_bo_va_mapping *mapping, *next;
- struct amdgpu_vm *vm = bo_va->vm;
+ struct amdgpu_vm *vm = bo_va->base.vm;
- list_del(&bo_va->bo_list);
+ list_del(&bo_va->base.bo_list);
spin_lock(&vm->status_lock);
- list_del(&bo_va->vm_status);
+ list_del(&bo_va->base.vm_status);
spin_unlock(&vm->status_lock);
list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
@@ -2438,13 +2382,14 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
struct amdgpu_bo *bo)
{
- struct amdgpu_bo_va *bo_va;
+ struct amdgpu_vm_bo_base *bo_base;
- list_for_each_entry(bo_va, &bo->va, bo_list) {
- spin_lock(&bo_va->vm->status_lock);
- if (list_empty(&bo_va->vm_status))
- list_add(&bo_va->vm_status, &bo_va->vm->invalidated);
- spin_unlock(&bo_va->vm->status_lock);
+ list_for_each_entry(bo_base, &bo->va, bo_list) {
+ spin_lock(&bo_base->vm->status_lock);
+ if (list_empty(&bo_base->vm_status))
+ list_add(&bo_base->vm_status,
+ &bo_base->vm->moved);
+ spin_unlock(&bo_base->vm->status_lock);
}
}
@@ -2462,12 +2407,26 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
}
/**
- * amdgpu_vm_adjust_size - adjust vm size and block size
+ * amdgpu_vm_set_fragment_size - adjust fragment size in PTE
+ *
+ * @adev: amdgpu_device pointer
+ * @fragment_size_default: the default fragment size if it's set auto
+ */
+void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, uint32_t fragment_size_default)
+{
+ if (amdgpu_vm_fragment_size == -1)
+ adev->vm_manager.fragment_size = fragment_size_default;
+ else
+ adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
+}
+
+/**
+ * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
*
* @adev: amdgpu_device pointer
* @vm_size: the default vm size if it's set auto
*/
-void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size)
+void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, uint32_t fragment_size_default)
{
/* adjust vm size firstly */
if (amdgpu_vm_size == -1)
@@ -2482,8 +2441,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size)
else
adev->vm_manager.block_size = amdgpu_vm_block_size;
- DRM_INFO("vm size is %llu GB, block size is %u-bit\n",
- adev->vm_manager.vm_size, adev->vm_manager.block_size);
+ amdgpu_vm_set_fragment_size(adev, fragment_size_default);
+
+ DRM_INFO("vm size is %llu GB, block size is %u-bit, fragment size is %u-bit\n",
+ adev->vm_manager.vm_size, adev->vm_manager.block_size,
+ adev->vm_manager.fragment_size);
}
/**
@@ -2507,12 +2469,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
u64 flags;
uint64_t init_pde_value = 0;
- vm->va = RB_ROOT;
+ vm->va = RB_ROOT_CACHED;
vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
vm->reserved_vmid[i] = NULL;
spin_lock_init(&vm->status_lock);
- INIT_LIST_HEAD(&vm->invalidated);
+ INIT_LIST_HEAD(&vm->moved);
INIT_LIST_HEAD(&vm->cleared);
INIT_LIST_HEAD(&vm->freed);
@@ -2628,10 +2590,11 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amd_sched_entity_fini(vm->entity.sched, &vm->entity);
- if (!RB_EMPTY_ROOT(&vm->va)) {
+ if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
dev_err(adev->dev, "still active bo inside vm\n");
}
- rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, rb) {
+ rbtree_postorder_for_each_entry_safe(mapping, tmp,
+ &vm->va.rb_root, rb) {
list_del(&mapping->list);
amdgpu_vm_it_remove(mapping, &vm->va);
kfree(mapping);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 217ecba8f4cc..6716355403ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -50,11 +50,6 @@ struct amdgpu_bo_list_entry;
/* PTBs (Page Table Blocks) need to be aligned to 32K */
#define AMDGPU_VM_PTB_ALIGN_SIZE 32768
-/* LOG2 number of continuous pages for the fragment field */
-#define AMDGPU_LOG2_PAGES_PER_FRAG(adev) \
- ((adev)->asic_type < CHIP_VEGA10 ? 4 : \
- (adev)->vm_manager.block_size)
-
#define AMDGPU_PTE_VALID (1ULL << 0)
#define AMDGPU_PTE_SYSTEM (1ULL << 1)
#define AMDGPU_PTE_SNOOPED (1ULL << 2)
@@ -99,11 +94,22 @@ struct amdgpu_bo_list_entry;
#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
#define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
+/* base structure for tracking BO usage in a VM */
+struct amdgpu_vm_bo_base {
+ /* constant after initialization */
+ struct amdgpu_vm *vm;
+ struct amdgpu_bo *bo;
+
+ /* protected by bo being reserved */
+ struct list_head bo_list;
+
+ /* protected by spinlock */
+ struct list_head vm_status;
+};
struct amdgpu_vm_pt {
struct amdgpu_bo *bo;
uint64_t addr;
- bool huge_page;
/* array of page tables, one for each directory entry */
struct amdgpu_vm_pt *entries;
@@ -112,13 +118,13 @@ struct amdgpu_vm_pt {
struct amdgpu_vm {
/* tree of virtual addresses mapped */
- struct rb_root va;
+ struct rb_root_cached va;
/* protecting invalidated */
spinlock_t status_lock;
/* BOs moved, but not yet updated in the PT */
- struct list_head invalidated;
+ struct list_head moved;
/* BOs cleared in the PT because of a move */
struct list_head cleared;
@@ -141,8 +147,6 @@ struct amdgpu_vm {
u64 client_id;
/* dedicated to vm */
struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS];
- /* each VM will map on CSA */
- struct amdgpu_bo_va *csa_bo_va;
/* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
bool use_cpu_for_update;
@@ -191,6 +195,7 @@ struct amdgpu_vm_manager {
uint32_t num_level;
uint64_t vm_size;
uint32_t block_size;
+ uint32_t fragment_size;
/* vram base address for page table entry */
u64 vram_base_offset;
/* vm pte handling */
@@ -223,8 +228,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int (*callback)(void *p, struct amdgpu_bo *bo),
void *param);
-void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
- struct amdgpu_vm *vm);
int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
uint64_t saddr, uint64_t size);
@@ -240,8 +243,8 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct dma_fence **fence);
-int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- struct amdgpu_sync *sync);
+int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_sync *sync);
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
bool clear);
@@ -268,7 +271,10 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
uint64_t saddr, uint64_t size);
void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va);
-void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size);
+void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev,
+ uint32_t fragment_size_default);
+void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size,
+ uint32_t fragment_size_default);
int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
struct amdgpu_job *job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index a2c59a08b2bd..26e900627971 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -28,6 +28,8 @@
struct amdgpu_vram_mgr {
struct drm_mm mm;
spinlock_t lock;
+ atomic64_t usage;
+ atomic64_t vis_usage;
};
/**
@@ -79,6 +81,27 @@ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man)
}
/**
+ * amdgpu_vram_mgr_vis_size - Calculate visible node size
+ *
+ * @adev: amdgpu device structure
+ * @node: MM node structure
+ *
+ * Calculate how many bytes of the MM node are inside visible VRAM
+ */
+static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
+ struct drm_mm_node *node)
+{
+ uint64_t start = node->start << PAGE_SHIFT;
+ uint64_t end = (node->size + node->start) << PAGE_SHIFT;
+
+ if (start >= adev->mc.visible_vram_size)
+ return 0;
+
+ return (end > adev->mc.visible_vram_size ?
+ adev->mc.visible_vram_size : end) - start;
+}
+
+/**
* amdgpu_vram_mgr_new - allocate new ranges
*
* @man: TTM memory type manager
@@ -93,11 +116,13 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
const struct ttm_place *place,
struct ttm_mem_reg *mem)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev);
struct amdgpu_vram_mgr *mgr = man->priv;
struct drm_mm *mm = &mgr->mm;
struct drm_mm_node *nodes;
enum drm_mm_insert_mode mode;
unsigned long lpfn, num_nodes, pages_per_node, pages_left;
+ uint64_t usage = 0, vis_usage = 0;
unsigned i;
int r;
@@ -142,6 +167,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
if (unlikely(r))
goto error;
+ usage += nodes[i].size << PAGE_SHIFT;
+ vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
+
/* Calculate a virtual BO start address to easily check if
* everything is CPU accessible.
*/
@@ -155,6 +183,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
}
spin_unlock(&mgr->lock);
+ atomic64_add(usage, &mgr->usage);
+ atomic64_add(vis_usage, &mgr->vis_usage);
+
mem->mm_node = nodes;
return 0;
@@ -181,8 +212,10 @@ error:
static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man,
struct ttm_mem_reg *mem)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev);
struct amdgpu_vram_mgr *mgr = man->priv;
struct drm_mm_node *nodes = mem->mm_node;
+ uint64_t usage = 0, vis_usage = 0;
unsigned pages = mem->num_pages;
if (!mem->mm_node)
@@ -192,31 +225,67 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man,
while (pages) {
pages -= nodes->size;
drm_mm_remove_node(nodes);
+ usage += nodes->size << PAGE_SHIFT;
+ vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes);
++nodes;
}
spin_unlock(&mgr->lock);
+ atomic64_sub(usage, &mgr->usage);
+ atomic64_sub(vis_usage, &mgr->vis_usage);
+
kfree(mem->mm_node);
mem->mm_node = NULL;
}
/**
+ * amdgpu_vram_mgr_usage - how many bytes are used in this domain
+ *
+ * @man: TTM memory type manager
+ *
+ * Returns how many bytes are used in this domain.
+ */
+uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man)
+{
+ struct amdgpu_vram_mgr *mgr = man->priv;
+
+ return atomic64_read(&mgr->usage);
+}
+
+/**
+ * amdgpu_vram_mgr_vis_usage - how many bytes are used in the visible part
+ *
+ * @man: TTM memory type manager
+ *
+ * Returns how many bytes are used in the visible part of VRAM
+ */
+uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man)
+{
+ struct amdgpu_vram_mgr *mgr = man->priv;
+
+ return atomic64_read(&mgr->vis_usage);
+}
+
+/**
* amdgpu_vram_mgr_debug - dump VRAM table
*
* @man: TTM memory type manager
- * @prefix: text prefix
+ * @printer: DRM printer to use
*
* Dump the table content using printk.
*/
static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man,
- const char *prefix)
+ struct drm_printer *printer)
{
struct amdgpu_vram_mgr *mgr = man->priv;
- struct drm_printer p = drm_debug_printer(prefix);
spin_lock(&mgr->lock);
- drm_mm_print(&mgr->mm, &p);
+ drm_mm_print(&mgr->mm, printer);
spin_unlock(&mgr->lock);
+
+ drm_printf(printer, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
+ man->size, amdgpu_vram_mgr_usage(man) >> 20,
+ amdgpu_vram_mgr_vis_usage(man) >> 20);
}
const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 53a4af7596c1..00868764a0dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -1921,6 +1921,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
ELEMENT_SIZE, 1);
sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
INDEX_STRIDE, 3);
+ WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
mutex_lock(&adev->srbm_mutex);
for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
@@ -1934,7 +1935,6 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
WREG32(mmSH_MEM_APE1_BASE, 1);
WREG32(mmSH_MEM_APE1_LIMIT, 0);
WREG32(mmSH_MEM_BASES, sh_mem_base);
- WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
}
cik_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 0710b0b2e4b6..fc260c13b1da 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -3707,6 +3707,8 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
ELEMENT_SIZE, 1);
sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
INDEX_STRIDE, 3);
+ WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
+
mutex_lock(&adev->srbm_mutex);
for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
vi_srbm_select(adev, 0, 0, 0, i);
@@ -3730,7 +3732,6 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
WREG32(mmSH_MEM_APE1_BASE, 1);
WREG32(mmSH_MEM_APE1_LIMIT, 0);
- WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
}
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
@@ -4578,9 +4579,9 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
mqd->compute_misc_reserved = 0x00000003;
if (!(adev->flags & AMD_IS_APU)) {
mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
- + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
+ + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
- + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
+ + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
}
eop_base_addr = ring->eop_gpu_addr >> 8;
mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
@@ -4767,8 +4768,8 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
mutex_unlock(&adev->srbm_mutex);
} else {
memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
- ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
- ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
+ ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
+ ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
mutex_lock(&adev->srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
gfx_v8_0_mqd_init(ring);
@@ -4791,8 +4792,8 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
- ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
- ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
+ ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
+ ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
mutex_lock(&adev->srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
gfx_v8_0_mqd_init(ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index b39f81dda847..69182eeca264 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2157,7 +2157,7 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
const struct cs_section_def *sect = NULL;
const struct cs_extent_def *ext = NULL;
- int r, i;
+ int r, i, tmp;
/* init the CP */
WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
@@ -2165,7 +2165,7 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
gfx_v9_0_cp_gfx_enable(adev, true);
- r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4);
+ r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
return r;
@@ -2203,6 +2203,12 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
amdgpu_ring_write(ring, 0x8000);
amdgpu_ring_write(ring, 0x8000);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
+ tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0);
+
amdgpu_ring_commit(ring);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 408723ef157c..6c8040e616c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -144,8 +144,8 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
tmp = mmVM_L2_CNTL3_DEFAULT;
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
tmp = mmVM_L2_CNTL4_DEFAULT;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 93c45f26b7c8..5be9c83dfcf7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -332,7 +332,24 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
adev->mc.visible_vram_size = adev->mc.aper_size;
- amdgpu_gart_set_defaults(adev);
+ /* set the gart size */
+ if (amdgpu_gart_size == -1) {
+ switch (adev->asic_type) {
+ case CHIP_HAINAN: /* no MM engines */
+ default:
+ adev->mc.gart_size = 256ULL << 20;
+ break;
+ case CHIP_VERDE: /* UVD, VCE do not support GPUVM */
+ case CHIP_TAHITI: /* UVD, VCE do not support GPUVM */
+ case CHIP_PITCAIRN: /* UVD, VCE do not support GPUVM */
+ case CHIP_OLAND: /* UVD, VCE do not support GPUVM */
+ adev->mc.gart_size = 1024ULL << 20;
+ break;
+ }
+ } else {
+ adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
+ }
+
gmc_v6_0_vram_gtt_location(adev, &adev->mc);
return 0;
@@ -461,6 +478,7 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
{
int r, i;
+ u32 field;
if (adev->gart.robj == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
@@ -488,10 +506,12 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
WREG32(mmVM_L2_CNTL2,
VM_L2_CNTL2__INVALIDATE_ALL_L1_TLBS_MASK |
VM_L2_CNTL2__INVALIDATE_L2_CACHE_MASK);
+
+ field = adev->vm_manager.fragment_size;
WREG32(mmVM_L2_CNTL3,
VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK |
- (4UL << VM_L2_CNTL3__BANK_SELECT__SHIFT) |
- (4UL << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT));
+ (field << VM_L2_CNTL3__BANK_SELECT__SHIFT) |
+ (field << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT));
/* setup context0 */
WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12);
WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12);
@@ -811,7 +831,7 @@ static int gmc_v6_0_sw_init(void *handle)
if (r)
return r;
- amdgpu_vm_adjust_size(adev, 64);
+ amdgpu_vm_adjust_size(adev, 64, 4);
adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
adev->mc.mc_mask = 0xffffffffffULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 4a9e84062874..eace9e7182c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -386,7 +386,27 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
adev->mc.visible_vram_size = adev->mc.real_vram_size;
- amdgpu_gart_set_defaults(adev);
+ /* set the gart size */
+ if (amdgpu_gart_size == -1) {
+ switch (adev->asic_type) {
+ case CHIP_TOPAZ: /* no MM engines */
+ default:
+ adev->mc.gart_size = 256ULL << 20;
+ break;
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_BONAIRE: /* UVD, VCE do not support GPUVM */
+ case CHIP_HAWAII: /* UVD, VCE do not support GPUVM */
+ case CHIP_KAVERI: /* UVD, VCE do not support GPUVM */
+ case CHIP_KABINI: /* UVD, VCE do not support GPUVM */
+ case CHIP_MULLINS: /* UVD, VCE do not support GPUVM */
+ adev->mc.gart_size = 1024ULL << 20;
+ break;
+#endif
+ }
+ } else {
+ adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
+ }
+
gmc_v7_0_vram_gtt_location(adev, &adev->mc);
return 0;
@@ -562,7 +582,7 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable)
static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
{
int r, i;
- u32 tmp;
+ u32 tmp, field;
if (adev->gart.robj == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
@@ -592,10 +612,12 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32(mmVM_L2_CNTL2, tmp);
+
+ field = adev->vm_manager.fragment_size;
tmp = RREG32(mmVM_L2_CNTL3);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 4);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 4);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field);
WREG32(mmVM_L2_CNTL3, tmp);
/* setup context0 */
WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12);
@@ -948,7 +970,7 @@ static int gmc_v7_0_sw_init(void *handle)
* Currently set to 4GB ((1 << 20) 4k pages).
* Max GPUVM size for cayman and SI is 40 bits.
*/
- amdgpu_vm_adjust_size(adev, 64);
+ amdgpu_vm_adjust_size(adev, 64, 4);
adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
/* Set the internal MC address mask
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 85c937b5e40b..3b3326daf32b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -562,7 +562,26 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
adev->mc.visible_vram_size = adev->mc.real_vram_size;
- amdgpu_gart_set_defaults(adev);
+ /* set the gart size */
+ if (amdgpu_gart_size == -1) {
+ switch (adev->asic_type) {
+ case CHIP_POLARIS11: /* all engines support GPUVM */
+ case CHIP_POLARIS10: /* all engines support GPUVM */
+ case CHIP_POLARIS12: /* all engines support GPUVM */
+ default:
+ adev->mc.gart_size = 256ULL << 20;
+ break;
+ case CHIP_TONGA: /* UVD, VCE do not support GPUVM */
+ case CHIP_FIJI: /* UVD, VCE do not support GPUVM */
+ case CHIP_CARRIZO: /* UVD, VCE do not support GPUVM, DCE SG support */
+ case CHIP_STONEY: /* UVD does not support GPUVM, DCE SG support */
+ adev->mc.gart_size = 1024ULL << 20;
+ break;
+ }
+ } else {
+ adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
+ }
+
gmc_v8_0_vram_gtt_location(adev, &adev->mc);
return 0;
@@ -762,7 +781,7 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
{
int r, i;
- u32 tmp;
+ u32 tmp, field;
if (adev->gart.robj == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
@@ -793,10 +812,12 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32(mmVM_L2_CNTL2, tmp);
+
+ field = adev->vm_manager.fragment_size;
tmp = RREG32(mmVM_L2_CNTL3);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 4);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 4);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field);
WREG32(mmVM_L2_CNTL3, tmp);
/* XXX: set to enable PTE/PDE in system memory */
tmp = RREG32(mmVM_L2_CNTL4);
@@ -1046,7 +1067,7 @@ static int gmc_v8_0_sw_init(void *handle)
* Currently set to 4GB ((1 << 20) 4k pages).
* Max GPUVM size for cayman and SI is 40 bits.
*/
- amdgpu_vm_adjust_size(adev, 64);
+ amdgpu_vm_adjust_size(adev, 64, 4);
adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
/* Set the internal MC address mask
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index c22899a08106..d04d0b123212 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -499,7 +499,21 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
adev->mc.visible_vram_size = adev->mc.real_vram_size;
- amdgpu_gart_set_defaults(adev);
+ /* set the gart size */
+ if (amdgpu_gart_size == -1) {
+ switch (adev->asic_type) {
+ case CHIP_VEGA10: /* all engines support GPUVM */
+ default:
+ adev->mc.gart_size = 256ULL << 20;
+ break;
+ case CHIP_RAVEN: /* DCE SG support */
+ adev->mc.gart_size = 1024ULL << 20;
+ break;
+ }
+ } else {
+ adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
+ }
+
gmc_v9_0_vram_gtt_location(adev, &adev->mc);
return 0;
@@ -541,9 +555,10 @@ static int gmc_v9_0_sw_init(void *handle)
adev->vm_manager.vm_size = 1U << 18;
adev->vm_manager.block_size = 9;
adev->vm_manager.num_level = 3;
+ amdgpu_vm_set_fragment_size(adev, 9);
} else {
- /* vm_size is 64GB for legacy 2-level page support*/
- amdgpu_vm_adjust_size(adev, 64);
+ /* vm_size is 64GB for legacy 2-level page support */
+ amdgpu_vm_adjust_size(adev, 64, 9);
adev->vm_manager.num_level = 1;
}
break;
@@ -558,14 +573,16 @@ static int gmc_v9_0_sw_init(void *handle)
adev->vm_manager.vm_size = 1U << 18;
adev->vm_manager.block_size = 9;
adev->vm_manager.num_level = 3;
+ amdgpu_vm_set_fragment_size(adev, 9);
break;
default:
break;
}
- DRM_INFO("vm size is %llu GB, block size is %u-bit\n",
+ DRM_INFO("vm size is %llu GB, block size is %u-bit,fragment size is %u-bit\n",
adev->vm_manager.vm_size,
- adev->vm_manager.block_size);
+ adev->vm_manager.block_size,
+ adev->vm_manager.fragment_size);
/* This interrupt is VMC page fault.*/
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index ad8def3cc343..74cb647da30e 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -158,8 +158,8 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
tmp = mmVM_L2_CNTL3_DEFAULT;
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
tmp = mmVM_L2_CNTL4_DEFAULT;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index e79befd80eed..7f408f85fdb6 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -250,6 +250,7 @@
#define PACKET3_SET_UCONFIG_REG 0x79
#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
+#define PACKET3_SET_UCONFIG_REG_INDEX_TYPE (2 << 28)
#define PACKET3_SCRATCH_RAM_WRITE 0x7D
#define PACKET3_SCRATCH_RAM_READ 0x7E
#define PACKET3_LOAD_CONST_RAM 0x80
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 6cac291c96da..9ff69b90df36 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1028,8 +1028,7 @@ static int vi_common_early_init(void *handle)
/* rev0 hardware requires workarounds to support PG */
adev->pg_flags = 0;
if (adev->rev_id != 0x00 || CZ_REV_BRISTOL(adev->pdev->revision)) {
- adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
- AMD_PG_SUPPORT_GFX_SMG |
+ adev->pg_flags |= AMD_PG_SUPPORT_GFX_SMG |
AMD_PG_SUPPORT_GFX_PIPELINE |
AMD_PG_SUPPORT_CP |
AMD_PG_SUPPORT_UVD |