Merge branch 'pm-tools'

Merge power management utilities updates for 6.18-rc1:

 - Fix and clean up the x86_energy_perf_policy utility and update its
   documentation (Len Brown, Kaushlendra Kumar)

 - Fix incorrect sorting of PMT telemetry in turbostat (Kaushlendra
   Kumar)

 - Fix incorrect size in cpuidle_state_disable() and the error return
   value of cpupower_write_sysfs() in cpupower (Kaushlendra Kumar)

* pm-tools:
  tools/power x86_energy_perf_policy.8: Emphasize preference for SW interfaces
  tools/power x86_energy_perf_policy: Add make snapshot target
  tools/power x86_energy_perf_policy: Prefer driver HWP limits
  tools/power x86_energy_perf_policy: EPB access is only via sysfs
  tools/power x86_energy_perf_policy: Prepare for MSR/sysfs refactoring
  tools/power x86_energy_perf_policy: Enhance HWP enable
  tools/power x86_energy_perf_policy: Enhance HWP enabled check
  tools/power x86_energy_perf_policy: Fix incorrect fopen mode usage
  tools/power turbostat: Fix incorrect sorting of PMT telemetry
  tools/cpupower: Fix incorrect size in cpuidle_state_disable()
  tools/cpupower: fix error return value in cpupower_write_sysfs()
This commit is contained in:
Rafael J. Wysocki 2025-09-29 13:10:10 +02:00
commit 40d2cf9c3c
6 changed files with 135 additions and 55 deletions

View File

@ -233,6 +233,7 @@ int cpuidle_state_disable(unsigned int cpu,
{
char value[SYSFS_PATH_MAX];
int bytes_written;
int len;
if (cpuidle_state_count(cpu) <= idlestate)
return -1;
@ -241,10 +242,10 @@ int cpuidle_state_disable(unsigned int cpu,
idlestate_value_files[IDLESTATE_DISABLE]))
return -2;
snprintf(value, SYSFS_PATH_MAX, "%u", disable);
len = snprintf(value, SYSFS_PATH_MAX, "%u", disable);
bytes_written = cpuidle_state_write_file(cpu, idlestate, "disable",
value, sizeof(disable));
value, len);
if (bytes_written)
return 0;
return -3;

View File

@ -56,7 +56,7 @@ unsigned int cpupower_write_sysfs(const char *path, char *buf, size_t buflen)
if (numwritten < 1) {
perror(path);
close(fd);
return -1;
return 0;
}
close(fd);

View File

@ -1890,7 +1890,7 @@ int pmt_telemdir_sort(const struct dirent **a, const struct dirent **b)
sscanf((*a)->d_name, "telem%u", &aidx);
sscanf((*b)->d_name, "telem%u", &bidx);
return aidx >= bidx;
return (aidx > bidx) ? 1 : (aidx < bidx) ? -1 : 0;
}
const struct dirent *pmt_diriter_next(struct pmt_diriter_t *iter)

View File

@ -1,8 +1,12 @@
# SPDX-License-Identifier: GPL-2.0
CC = $(CROSS_COMPILE)gcc
BUILD_OUTPUT := $(CURDIR)
BUILD_OUTPUT := $(CURDIR)
PREFIX := /usr
DESTDIR :=
DAY := $(shell date +%Y.%m.%d)
SNAPSHOT = x86_energy_perf_policy-$(DAY)
ifeq ("$(origin O)", "command line")
BUILD_OUTPUT := $(O)
@ -27,3 +31,26 @@ install : x86_energy_perf_policy
install -d $(DESTDIR)$(PREFIX)/share/man/man8
install -m 644 x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8
snapshot: x86_energy_perf_policy
@rm -rf $(SNAPSHOT)
@mkdir $(SNAPSHOT)
@cp x86_energy_perf_policy Makefile x86_energy_perf_policy.c x86_energy_perf_policy.8 $(SNAPSHOT)
@sed -e 's/^#include <linux\/bits.h>/#include "bits.h"/' -e 's/u64/unsigned long long/' ../../../../arch/x86/include/asm/msr-index.h > $(SNAPSHOT)/msr-index.h
@echo '#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))' >> $(SNAPSHOT)/msr-index.h
@echo "#define BIT(x) (1 << (x))" > $(SNAPSHOT)/bits.h
@echo "#define BIT_ULL(nr) (1ULL << (nr))" >> $(SNAPSHOT)/bits.h
@echo "#define GENMASK(h, l) (((~0UL) << (l)) & (~0UL >> (sizeof(long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h
@echo "#define GENMASK_ULL(h, l) (((~0ULL) << (l)) & (~0ULL >> (sizeof(long long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h
@echo '#define BUILD_BUG_ON(cond) do { enum { compile_time_check ## __COUNTER__ = 1/(!(cond)) }; } while (0)' > $(SNAPSHOT)/build_bug.h
@echo '#define __must_be_array(arr) 0' >> $(SNAPSHOT)/build_bug.h
@echo PWD=. > $(SNAPSHOT)/Makefile
@echo "CFLAGS += -DMSRHEADER='\"msr-index.h\"'" >> $(SNAPSHOT)/Makefile
@echo "CFLAGS += -DBUILD_BUG_HEADER='\"build_bug.h\"'" >> $(SNAPSHOT)/Makefile
@sed -e's/.*MSRHEADER.*//' Makefile >> $(SNAPSHOT)/Makefile
@rm -f $(SNAPSHOT).tar.gz
tar cvzf $(SNAPSHOT).tar.gz $(SNAPSHOT)

View File

@ -2,7 +2,7 @@
.\" Distributed under the GPL, Copyleft 1994.
.TH X86_ENERGY_PERF_POLICY 8
.SH NAME
x86_energy_perf_policy \- Manage Energy vs. Performance Policy via x86 Model Specific Registers
x86_energy_perf_policy \- Manage Energy vs. Performance Policy
.SH SYNOPSIS
.B x86_energy_perf_policy
.RB "[ options ] [ scope ] [field \ value]"
@ -19,9 +19,14 @@ x86_energy_perf_policy \- Manage Energy vs. Performance Policy via x86 Model Spe
.SH DESCRIPTION
\fBx86_energy_perf_policy\fP
displays and updates energy-performance policy settings specific to
Intel Architecture Processors. Settings are accessed via Model Specific Register (MSR)
updates, no matter if the Linux cpufreq sub-system is enabled or not.
Intel Architecture Processors. It summarizes settings available
in standard Linux interfaces (eg. cpufreq),
and also decodes underlying Model Specific Register (MSRs).
While \fBx86_energy_perf_policy\fP can manage energy-performance policy
using only MSR access, it prefers standard
Linux kernel interfaces, when they are available.
.SH BACKGROUND
Policy in MSR_IA32_ENERGY_PERF_BIAS (EPB)
may affect a wide range of hardware decisions,
such as how aggressively the hardware enters and exits CPU idle states (C-states)
@ -200,7 +205,9 @@ runs only as root.
.SH FILES
.ta
.nf
/dev/cpu/*/msr
EPB: /sys/devices/system/cpu/cpu*/power/energy_perf_bias
EPP: /sys/devices/system/cpu/cpu*/cpufreq/energy_performance_preference
MSR: /dev/cpu/*/msr
.fi
.SH "SEE ALSO"
.nf

View File

@ -4,7 +4,7 @@
* policy preference bias on recent X86 processors.
*/
/*
* Copyright (c) 2010 - 2017 Intel Corporation.
* Copyright (c) 2010 - 2025 Intel Corporation.
* Len Brown <len.brown@intel.com>
*/
@ -62,6 +62,7 @@ unsigned char turbo_update_value;
unsigned char update_hwp_epp;
unsigned char update_hwp_min;
unsigned char update_hwp_max;
unsigned char hwp_limits_done_via_sysfs;
unsigned char update_hwp_desired;
unsigned char update_hwp_window;
unsigned char update_hwp_use_pkg;
@ -517,7 +518,7 @@ void for_packages(unsigned long long pkg_set, int (func)(int))
void print_version(void)
{
printf("x86_energy_perf_policy 17.05.11 (C) Len Brown <len.brown@intel.com>\n");
printf("x86_energy_perf_policy 2025.9.19 Len Brown <lenb@kernel.org>\n");
}
void cmdline(int argc, char **argv)
@ -630,7 +631,7 @@ void cmdline(int argc, char **argv)
*/
FILE *fopen_or_die(const char *path, const char *mode)
{
FILE *filep = fopen(path, "r");
FILE *filep = fopen(path, mode);
if (!filep)
err(1, "%s: open failed", path);
@ -644,7 +645,7 @@ void err_on_hypervisor(void)
char *buffer;
/* On VMs /proc/cpuinfo contains a "flags" entry for hypervisor */
cpuinfo = fopen_or_die("/proc/cpuinfo", "ro");
cpuinfo = fopen_or_die("/proc/cpuinfo", "r");
buffer = malloc(4096);
if (!buffer) {
@ -809,7 +810,7 @@ void print_hwp_request_pkg(int pkg, struct msr_hwp_request *h, char *str)
h->hwp_min, h->hwp_max, h->hwp_desired, h->hwp_epp,
h->hwp_window, h->hwp_window & 0x7F, (h->hwp_window >> 7) & 0x7);
}
void read_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset)
void read_hwp_request_msr(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset)
{
unsigned long long msr;
@ -823,7 +824,7 @@ void read_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr
hwp_req->hwp_use_pkg = (((msr) >> 42) & 0x1);
}
void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset)
void write_hwp_request_msr(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset)
{
unsigned long long msr = 0;
@ -843,7 +844,7 @@ void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int ms
put_msr(cpu, msr_offset, msr);
}
static int get_epb(int cpu)
static int get_epb_sysfs(int cpu)
{
char path[SYSFS_PATH_MAX];
char linebuf[3];
@ -865,7 +866,7 @@ static int get_epb(int cpu)
return (int)val;
}
static int set_epb(int cpu, int val)
static int set_epb_sysfs(int cpu, int val)
{
char path[SYSFS_PATH_MAX];
char linebuf[3];
@ -895,14 +896,14 @@ int print_cpu_msrs(int cpu)
struct msr_hwp_cap cap;
int epb;
epb = get_epb(cpu);
epb = get_epb_sysfs(cpu);
if (epb >= 0)
printf("cpu%d: EPB %u\n", cpu, (unsigned int) epb);
if (!has_hwp)
return 0;
read_hwp_request(cpu, &req, MSR_HWP_REQUEST);
read_hwp_request_msr(cpu, &req, MSR_HWP_REQUEST);
print_hwp_request(cpu, &req, "");
read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES);
@ -919,7 +920,7 @@ int print_pkg_msrs(int pkg)
if (!has_hwp)
return 0;
read_hwp_request(first_cpu_in_pkg[pkg], &req, MSR_HWP_REQUEST_PKG);
read_hwp_request_msr(first_cpu_in_pkg[pkg], &req, MSR_HWP_REQUEST_PKG);
print_hwp_request_pkg(pkg, &req, "");
if (has_hwp_notify) {
@ -951,8 +952,10 @@ int ratio_2_sysfs_khz(int ratio)
}
/*
* If HWP is enabled and cpufreq sysfs attribtes are present,
* then update sysfs, so that it will not become
* stale when we write to MSRs.
* then update via sysfs. The intel_pstate driver may modify (clip)
* this request, say, when HWP_CAP is outside of PLATFORM_INFO limits,
* and the driver-chosen value takes precidence.
*
* (intel_pstate's max_perf_pct and min_perf_pct will follow cpufreq,
* so we don't have to touch that.)
*/
@ -1007,6 +1010,8 @@ int update_sysfs(int cpu)
if (update_hwp_max)
update_cpufreq_scaling_freq(1, cpu, req_update.hwp_max);
hwp_limits_done_via_sysfs = 1;
return 0;
}
@ -1074,21 +1079,21 @@ int check_hwp_request_v_hwp_capabilities(int cpu, struct msr_hwp_request *req, s
return 0;
}
int update_hwp_request(int cpu)
int update_hwp_request_msr(int cpu)
{
struct msr_hwp_request req;
struct msr_hwp_cap cap;
int msr_offset = MSR_HWP_REQUEST;
read_hwp_request(cpu, &req, msr_offset);
read_hwp_request_msr(cpu, &req, msr_offset);
if (debug)
print_hwp_request(cpu, &req, "old: ");
if (update_hwp_min)
if (update_hwp_min && !hwp_limits_done_via_sysfs)
req.hwp_min = req_update.hwp_min;
if (update_hwp_max)
if (update_hwp_max && !hwp_limits_done_via_sysfs)
req.hwp_max = req_update.hwp_max;
if (update_hwp_desired)
@ -1111,15 +1116,15 @@ int update_hwp_request(int cpu)
verify_hwp_req_self_consistency(cpu, &req);
write_hwp_request(cpu, &req, msr_offset);
write_hwp_request_msr(cpu, &req, msr_offset);
if (debug) {
read_hwp_request(cpu, &req, msr_offset);
read_hwp_request_msr(cpu, &req, msr_offset);
print_hwp_request(cpu, &req, "new: ");
}
return 0;
}
int update_hwp_request_pkg(int pkg)
int update_hwp_request_pkg_msr(int pkg)
{
struct msr_hwp_request req;
struct msr_hwp_cap cap;
@ -1127,7 +1132,7 @@ int update_hwp_request_pkg(int pkg)
int msr_offset = MSR_HWP_REQUEST_PKG;
read_hwp_request(cpu, &req, msr_offset);
read_hwp_request_msr(cpu, &req, msr_offset);
if (debug)
print_hwp_request_pkg(pkg, &req, "old: ");
@ -1155,10 +1160,10 @@ int update_hwp_request_pkg(int pkg)
verify_hwp_req_self_consistency(cpu, &req);
write_hwp_request(cpu, &req, msr_offset);
write_hwp_request_msr(cpu, &req, msr_offset);
if (debug) {
read_hwp_request(cpu, &req, msr_offset);
read_hwp_request_msr(cpu, &req, msr_offset);
print_hwp_request_pkg(pkg, &req, "new: ");
}
return 0;
@ -1166,13 +1171,32 @@ int update_hwp_request_pkg(int pkg)
int enable_hwp_on_cpu(int cpu)
{
unsigned long long msr;
unsigned long long old_msr, new_msr;
get_msr(cpu, MSR_PM_ENABLE, &msr);
put_msr(cpu, MSR_PM_ENABLE, 1);
get_msr(cpu, MSR_PM_ENABLE, &old_msr);
if (old_msr & 1)
return 0; /* already enabled */
new_msr = old_msr | 1;
put_msr(cpu, MSR_PM_ENABLE, new_msr);
if (verbose)
printf("cpu%d: MSR_PM_ENABLE old: %d new: %d\n", cpu, (unsigned int) msr, 1);
printf("cpu%d: MSR_PM_ENABLE old: %llX new: %llX\n", cpu, old_msr, new_msr);
return 0;
}
int update_cpu_epb_sysfs(int cpu)
{
int epb;
epb = get_epb_sysfs(cpu);
set_epb_sysfs(cpu, new_epb);
if (verbose)
printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n",
cpu, epb, (unsigned int) new_epb);
return 0;
}
@ -1180,16 +1204,6 @@ int enable_hwp_on_cpu(int cpu)
int update_cpu_msrs(int cpu)
{
unsigned long long msr;
int epb;
if (update_epb) {
epb = get_epb(cpu);
set_epb(cpu, new_epb);
if (verbose)
printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n",
cpu, epb, (unsigned int) new_epb);
}
if (update_turbo) {
int turbo_is_present_and_disabled;
@ -1224,7 +1238,7 @@ int update_cpu_msrs(int cpu)
if (!hwp_update_enabled())
return 0;
update_hwp_request(cpu);
update_hwp_request_msr(cpu);
return 0;
}
@ -1312,6 +1326,17 @@ void for_all_cpus_in_set(size_t set_size, cpu_set_t *cpu_set, int (func)(int))
if (CPU_ISSET_S(cpu_num, set_size, cpu_set))
func(cpu_num);
}
int for_all_cpus_in_set_and(size_t set_size, cpu_set_t *cpu_set, int (func)(int))
{
int cpu_num;
int retval = 1;
for (cpu_num = 0; cpu_num <= max_cpu_num; ++cpu_num)
if (CPU_ISSET_S(cpu_num, set_size, cpu_set))
retval &= func(cpu_num);
return retval;
}
void init_data_structures(void)
{
@ -1326,21 +1351,38 @@ void init_data_structures(void)
for_all_proc_cpus(mark_cpu_present);
}
/* clear has_hwp if it is not enable (or being enabled) */
void verify_hwp_is_enabled(void)
int is_hwp_enabled_on_cpu(int cpu_num)
{
unsigned long long msr;
int retval;
/* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
get_msr(cpu_num, MSR_PM_ENABLE, &msr);
retval = (msr & 1);
if (verbose)
fprintf(stderr, "cpu%d: %sHWP\n", cpu_num, retval ? "" : "No-");
return retval;
}
/*
* verify_hwp_is_enabled()
*
* Set (has_hwp=0) if no HWP feature or any of selected CPU set does not have HWP enabled
*/
void verify_hwp_is_enabled(void)
{
int retval;
if (!has_hwp) /* set in early_cpuid() */
return;
/* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
get_msr(base_cpu, MSR_PM_ENABLE, &msr);
if ((msr & 1) == 0) {
retval = for_all_cpus_in_set_and(cpu_setsize, cpu_selected_set, is_hwp_enabled_on_cpu);
if (retval == 0) {
fprintf(stderr, "HWP can be enabled using '--hwp-enable'\n");
has_hwp = 0;
return;
}
}
@ -1551,10 +1593,13 @@ int main(int argc, char **argv)
/* update CPU set */
if (cpu_selected_set) {
if (update_epb)
for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_cpu_epb_sysfs);
for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_sysfs);
for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_cpu_msrs);
} else if (pkg_selected_set)
for_packages(pkg_selected_set, update_hwp_request_pkg);
for_packages(pkg_selected_set, update_hwp_request_pkg_msr);
return 0;
}