summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c254
1 files changed, 243 insertions, 11 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 34471dbaa872..a73e1903d29b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -67,6 +67,15 @@ static const struct cg_flag_name clocks[] = {
{0, NULL},
};
+static const struct hwmon_temp_label {
+ enum PP_HWMON_TEMP channel;
+ const char *label;
+} temp_label[] = {
+ {PP_TEMP_EDGE, "edge"},
+ {PP_TEMP_JUNCTION, "junction"},
+ {PP_TEMP_MEM, "mem"},
+};
+
void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
{
if (adev->pm.dpm_enabled) {
@@ -758,7 +767,11 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev,
pr_debug("featuremask = 0x%llx\n", featuremask);
- if (adev->powerplay.pp_funcs->set_ppfeature_status) {
+ if (is_support_sw_smu(adev)) {
+ ret = smu_set_ppfeature_status(&adev->smu, featuremask);
+ if (ret)
+ return -EINVAL;
+ } else if (adev->powerplay.pp_funcs->set_ppfeature_status) {
ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask);
if (ret)
return -EINVAL;
@@ -774,7 +787,9 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
- if (adev->powerplay.pp_funcs->get_ppfeature_status)
+ if (is_support_sw_smu(adev)) {
+ return smu_get_ppfeature_status(&adev->smu, buf);
+ } else if (adev->powerplay.pp_funcs->get_ppfeature_status)
return amdgpu_dpm_get_ppfeature_status(adev, buf);
return snprintf(buf, PAGE_SIZE, "\n");
@@ -1303,6 +1318,32 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,
}
/**
+ * DOC: mem_busy_percent
+ *
+ * The amdgpu driver provides a sysfs API for reading how busy the VRAM
+ * is as a percentage. The file mem_busy_percent is used for this.
+ * The SMU firmware computes a percentage of load based on the
+ * aggregate activity level in the IP cores.
+ */
+static ssize_t amdgpu_get_memory_busy_percent(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ int r, value, size = sizeof(value);
+
+ /* read the IP busy sensor */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD,
+ (void *)&value, &size);
+
+ if (r)
+ return r;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", value);
+}
+
+/**
* DOC: pcie_bw
*
* The amdgpu driver provides a sysfs API for estimating how much data
@@ -1327,6 +1368,29 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
count0, count1, pcie_get_mps(adev->pdev));
}
+/**
+ * DOC: unique_id
+ *
+ * The amdgpu driver provides a sysfs API for providing a unique ID for the GPU
+ * The file unique_id is used for this.
+ * This will provide a Unique ID that will persist from machine to machine
+ *
+ * NOTE: This will only work for GFX9 and newer. This file will be absent
+ * on unsupported ASICs (GFX8 and older)
+ */
+static ssize_t amdgpu_get_unique_id(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+
+ if (adev->unique_id)
+ return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id);
+
+ return 0;
+}
+
static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
amdgpu_get_dpm_forced_performance_level,
@@ -1371,10 +1435,13 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
amdgpu_set_pp_od_clk_voltage);
static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
amdgpu_get_busy_percent, NULL);
+static DEVICE_ATTR(mem_busy_percent, S_IRUGO,
+ amdgpu_get_memory_busy_percent, NULL);
static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL);
static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR,
amdgpu_get_ppfeature_status,
amdgpu_set_ppfeature_status);
+static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL);
static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
struct device_attribute *attr,
@@ -1382,6 +1449,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
struct drm_device *ddev = adev->ddev;
+ int channel = to_sensor_dev_attr(attr)->index;
int r, temp, size = sizeof(temp);
/* Can't get temperature when the card is off */
@@ -1389,11 +1457,32 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
(ddev->switch_power_state != DRM_SWITCH_POWER_ON))
return -EINVAL;
- /* get the temperature */
- r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
- (void *)&temp, &size);
- if (r)
- return r;
+ if (channel >= PP_TEMP_MAX)
+ return -EINVAL;
+
+ switch (channel) {
+ case PP_TEMP_JUNCTION:
+ /* get current junction temperature */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
+ (void *)&temp, &size);
+ if (r)
+ return r;
+ break;
+ case PP_TEMP_EDGE:
+ /* get current edge temperature */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP,
+ (void *)&temp, &size);
+ if (r)
+ return r;
+ break;
+ case PP_TEMP_MEM:
+ /* get current memory temperature */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP,
+ (void *)&temp, &size);
+ if (r)
+ return r;
+ break;
+ }
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
}
@@ -1414,6 +1503,76 @@ static ssize_t amdgpu_hwmon_show_temp_thresh(struct device *dev,
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
}
+static ssize_t amdgpu_hwmon_show_hotspot_temp_thresh(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ int hyst = to_sensor_dev_attr(attr)->index;
+ int temp;
+
+ if (hyst)
+ temp = adev->pm.dpm.thermal.min_hotspot_temp;
+ else
+ temp = adev->pm.dpm.thermal.max_hotspot_crit_temp;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", temp);
+}
+
+static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ int hyst = to_sensor_dev_attr(attr)->index;
+ int temp;
+
+ if (hyst)
+ temp = adev->pm.dpm.thermal.min_mem_temp;
+ else
+ temp = adev->pm.dpm.thermal.max_mem_crit_temp;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", temp);
+}
+
+static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int channel = to_sensor_dev_attr(attr)->index;
+
+ if (channel >= PP_TEMP_MAX)
+ return -EINVAL;
+
+ return snprintf(buf, PAGE_SIZE, "%s\n", temp_label[channel].label);
+}
+
+static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ int channel = to_sensor_dev_attr(attr)->index;
+ int temp = 0;
+
+ if (channel >= PP_TEMP_MAX)
+ return -EINVAL;
+
+ switch (channel) {
+ case PP_TEMP_JUNCTION:
+ temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp;
+ break;
+ case PP_TEMP_EDGE:
+ temp = adev->pm.dpm.thermal.max_edge_emergency_temp;
+ break;
+ case PP_TEMP_MEM:
+ temp = adev->pm.dpm.thermal.max_mem_emergency_temp;
+ break;
+ }
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", temp);
+}
+
static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -1983,11 +2142,20 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
*
* hwmon interfaces for GPU temperature:
*
- * - temp1_input: the on die GPU temperature in millidegrees Celsius
+ * - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius
+ * - temp2_input and temp3_input are supported on SOC15 dGPUs only
+ *
+ * - temp[1-3]_label: temperature channel label
+ * - temp2_label and temp3_label are supported on SOC15 dGPUs only
+ *
+ * - temp[1-3]_crit: temperature critical max value in millidegrees Celsius
+ * - temp2_crit and temp3_crit are supported on SOC15 dGPUs only
*
- * - temp1_crit: temperature critical max value in millidegrees Celsius
+ * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
+ * - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only
*
- * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
+ * - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius
+ * - these are supported on SOC15 dGPUs only
*
* hwmon interfaces for GPU voltage:
*
@@ -2035,9 +2203,21 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
*
*/
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_EDGE);
static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE);
+static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_JUNCTION);
+static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION);
+static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_MEM);
+static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM);
+static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE);
+static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION);
+static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM);
static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0);
static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0);
static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
@@ -2064,6 +2244,18 @@ static struct attribute *hwmon_attributes[] = {
&sensor_dev_attr_temp1_input.dev_attr.attr,
&sensor_dev_attr_temp1_crit.dev_attr.attr,
&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
+ &sensor_dev_attr_temp2_input.dev_attr.attr,
+ &sensor_dev_attr_temp2_crit.dev_attr.attr,
+ &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr,
+ &sensor_dev_attr_temp3_input.dev_attr.attr,
+ &sensor_dev_attr_temp3_crit.dev_attr.attr,
+ &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr,
+ &sensor_dev_attr_temp1_emergency.dev_attr.attr,
+ &sensor_dev_attr_temp2_emergency.dev_attr.attr,
+ &sensor_dev_attr_temp3_emergency.dev_attr.attr,
+ &sensor_dev_attr_temp1_label.dev_attr.attr,
+ &sensor_dev_attr_temp2_label.dev_attr.attr,
+ &sensor_dev_attr_temp3_label.dev_attr.attr,
&sensor_dev_attr_pwm1.dev_attr.attr,
&sensor_dev_attr_pwm1_enable.dev_attr.attr,
&sensor_dev_attr_pwm1_min.dev_attr.attr,
@@ -2186,6 +2378,22 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
attr == &sensor_dev_attr_freq2_label.dev_attr.attr))
return 0;
+ /* only SOC15 dGPUs support hotspot and mem temperatures */
+ if (((adev->flags & AMD_IS_APU) ||
+ adev->asic_type < CHIP_VEGA10) &&
+ (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp3_crit.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp2_input.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp3_input.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp2_label.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp3_label.dev_attr.attr))
+ return 0;
+
return effective_mode;
}
@@ -2612,6 +2820,16 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
"gpu_busy_level\n");
return ret;
}
+ /* APU does not have its own dedicated memory */
+ if (!(adev->flags & AMD_IS_APU)) {
+ ret = device_create_file(adev->dev,
+ &dev_attr_mem_busy_percent);
+ if (ret) {
+ DRM_ERROR("failed to create device file "
+ "mem_busy_percent\n");
+ return ret;
+ }
+ }
/* PCIe Perf counters won't work on APU nodes */
if (!(adev->flags & AMD_IS_APU)) {
ret = device_create_file(adev->dev, &dev_attr_pcie_bw);
@@ -2620,6 +2838,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
return ret;
}
}
+ if (adev->unique_id)
+ ret = device_create_file(adev->dev, &dev_attr_unique_id);
+ if (ret) {
+ DRM_ERROR("failed to create device file unique_id\n");
+ return ret;
+ }
ret = amdgpu_debugfs_pm_init(adev);
if (ret) {
DRM_ERROR("Failed to register debugfs file for dpm!\n");
@@ -2678,7 +2902,11 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
&dev_attr_pp_od_clk_voltage);
device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
if (!(adev->flags & AMD_IS_APU))
+ device_remove_file(adev->dev, &dev_attr_mem_busy_percent);
+ if (!(adev->flags & AMD_IS_APU))
device_remove_file(adev->dev, &dev_attr_pcie_bw);
+ if (adev->unique_id)
+ device_remove_file(adev->dev, &dev_attr_unique_id);
if ((adev->asic_type >= CHIP_VEGA10) &&
!(adev->flags & AMD_IS_APU))
device_remove_file(adev->dev, &dev_attr_ppfeatures);
@@ -2775,6 +3003,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
/* GPU Load */
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size))
seq_printf(m, "GPU Load: %u %%\n", value);
+ /* MEM Load */
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size))
+ seq_printf(m, "MEM Load: %u %%\n", value);
+
seq_printf(m, "\n");
/* SMC feature mask */