summaryrefslogtreecommitdiffstats
path: root/Documentation/block/ioprio.rst
diff options
context:
space:
mode:
authorLinus Torvalds2019-07-16 21:21:41 +0200
committerLinus Torvalds2019-07-16 21:21:41 +0200
commitc309b6f24222246c18a8b65d3950e6e755440865 (patch)
tree11893170f5c246bb0dee8066e85878af04162ab0 /Documentation/block/ioprio.rst
parentMerge tag 'xtensa-20190715' of git://github.com/jcmvbkbc/linux-xtensa (diff)
parentdocs: kbuild: fix build with pdf and fix some minor issues (diff)
downloadkernel-qcow2-linux-c309b6f24222246c18a8b65d3950e6e755440865.tar.gz
kernel-qcow2-linux-c309b6f24222246c18a8b65d3950e6e755440865.tar.xz
kernel-qcow2-linux-c309b6f24222246c18a8b65d3950e6e755440865.zip
Merge tag 'docs/v5.3-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
Pull rst conversion of docs from Mauro Carvalho Chehab: "As agreed with Jon, I'm sending this big series directly to you, c/c him, as this series required a special care, in order to avoid conflicts with other trees" * tag 'docs/v5.3-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media: (77 commits) docs: kbuild: fix build with pdf and fix some minor issues docs: block: fix pdf output docs: arm: fix a breakage with pdf output docs: don't use nested tables docs: gpio: add sysfs interface to the admin-guide docs: locking: add it to the main index docs: add some directories to the main documentation index docs: add SPDX tags to new index files docs: add a memory-devices subdir to driver-api docs: phy: place documentation under driver-api docs: serial: move it to the driver-api docs: driver-api: add remaining converted dirs to it docs: driver-api: add xilinx driver API documentation docs: driver-api: add a series of orphaned documents docs: admin-guide: add a series of orphaned documents docs: cgroup-v1: add it to the admin-guide book docs: aoe: add it to the driver-api book docs: add some documentation dirs to the driver-api book docs: driver-model: move it to the driver-api book docs: lp855x-driver.rst: add it to the driver-api book ...
Diffstat (limited to 'Documentation/block/ioprio.rst')
-rw-r--r--Documentation/block/ioprio.rst182
1 files changed, 182 insertions, 0 deletions
diff --git a/Documentation/block/ioprio.rst b/Documentation/block/ioprio.rst
new file mode 100644
index 000000000000..f72b0de65af7
--- /dev/null
+++ b/Documentation/block/ioprio.rst
@@ -0,0 +1,182 @@
+===================
+Block io priorities
+===================
+
+
+Intro
+-----
+
+With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io
+priorities are supported for reads on files. This enables users to io nice
+processes or process groups, similar to what has been possible with cpu
+scheduling for ages. This document mainly details the current possibilities
+with cfq; other io schedulers do not support io priorities thus far.
+
+Scheduling classes
+------------------
+
+CFQ implements three generic scheduling classes that determine how io is
+served for a process.
+
+IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
+higher priority than any other in the system, processes from this class are
+given first access to the disk every time. Thus it needs to be used with some
+care, one io RT process can starve the entire system. Within the RT class,
+there are 8 levels of class data that determine exactly how much time this
+process needs the disk for on each service. In the future this might change
+to be more directly mappable to performance, by passing in a wanted data
+rate instead.
+
+IOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default
+for any process that hasn't set a specific io priority. The class data
+determines how much io bandwidth the process will get, it's directly mappable
+to the cpu nice levels just more coarsely implemented. 0 is the highest
+BE prio level, 7 is the lowest. The mapping between cpu nice level and io
+nice level is determined as: io_nice = (cpu_nice + 20) / 5.
+
+IOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this
+level only get io time when no one else needs the disk. The idle class has no
+class data, since it doesn't really apply here.
+
+Tools
+-----
+
+See below for a sample ionice tool. Usage::
+
+ # ionice -c<class> -n<level> -p<pid>
+
+If pid isn't given, the current process is assumed. IO priority settings
+are inherited on fork, so you can use ionice to start the process at a given
+level::
+
+ # ionice -c2 -n0 /bin/ls
+
+will run ls at the best-effort scheduling class at the highest priority.
+For a running process, you can give the pid instead::
+
+ # ionice -c1 -n2 -p100
+
+will change pid 100 to run at the realtime scheduling class, at priority 2.
+
+ionice.c tool::
+
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <errno.h>
+ #include <getopt.h>
+ #include <unistd.h>
+ #include <sys/ptrace.h>
+ #include <asm/unistd.h>
+
+ extern int sys_ioprio_set(int, int, int);
+ extern int sys_ioprio_get(int, int);
+
+ #if defined(__i386__)
+ #define __NR_ioprio_set 289
+ #define __NR_ioprio_get 290
+ #elif defined(__ppc__)
+ #define __NR_ioprio_set 273
+ #define __NR_ioprio_get 274
+ #elif defined(__x86_64__)
+ #define __NR_ioprio_set 251
+ #define __NR_ioprio_get 252
+ #elif defined(__ia64__)
+ #define __NR_ioprio_set 1274
+ #define __NR_ioprio_get 1275
+ #else
+ #error "Unsupported arch"
+ #endif
+
+ static inline int ioprio_set(int which, int who, int ioprio)
+ {
+ return syscall(__NR_ioprio_set, which, who, ioprio);
+ }
+
+ static inline int ioprio_get(int which, int who)
+ {
+ return syscall(__NR_ioprio_get, which, who);
+ }
+
+ enum {
+ IOPRIO_CLASS_NONE,
+ IOPRIO_CLASS_RT,
+ IOPRIO_CLASS_BE,
+ IOPRIO_CLASS_IDLE,
+ };
+
+ enum {
+ IOPRIO_WHO_PROCESS = 1,
+ IOPRIO_WHO_PGRP,
+ IOPRIO_WHO_USER,
+ };
+
+ #define IOPRIO_CLASS_SHIFT 13
+
+ const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
+
+ int main(int argc, char *argv[])
+ {
+ int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE;
+ int c, pid = 0;
+
+ while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) {
+ switch (c) {
+ case 'n':
+ ioprio = strtol(optarg, NULL, 10);
+ set = 1;
+ break;
+ case 'c':
+ ioprio_class = strtol(optarg, NULL, 10);
+ set = 1;
+ break;
+ case 'p':
+ pid = strtol(optarg, NULL, 10);
+ break;
+ }
+ }
+
+ switch (ioprio_class) {
+ case IOPRIO_CLASS_NONE:
+ ioprio_class = IOPRIO_CLASS_BE;
+ break;
+ case IOPRIO_CLASS_RT:
+ case IOPRIO_CLASS_BE:
+ break;
+ case IOPRIO_CLASS_IDLE:
+ ioprio = 7;
+ break;
+ default:
+ printf("bad prio class %d\n", ioprio_class);
+ return 1;
+ }
+
+ if (!set) {
+ if (!pid && argv[optind])
+ pid = strtol(argv[optind], NULL, 10);
+
+ ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid);
+
+ printf("pid=%d, %d\n", pid, ioprio);
+
+ if (ioprio == -1)
+ perror("ioprio_get");
+ else {
+ ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT;
+ ioprio = ioprio & 0xff;
+ printf("%s: prio %d\n", to_prio[ioprio_class], ioprio);
+ }
+ } else {
+ if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) {
+ perror("ioprio_set");
+ return 1;
+ }
+
+ if (argv[optind])
+ execvp(argv[optind], &argv[optind]);
+ }
+
+ return 0;
+ }
+
+
+March 11 2005, Jens Axboe <jens.axboe@oracle.com>