ioprio.txt 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. Block io priorities
  2. ===================
  3. Intro
  4. -----
  5. With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io
  6. priorities are supported for reads on files. This enables users to io nice
  7. processes or process groups, similar to what has been possible with cpu
  8. scheduling for ages. This document mainly details the current possibilities
  9. with cfq; other io schedulers do not support io priorities thus far.
  10. Scheduling classes
  11. ------------------
  12. CFQ implements three generic scheduling classes that determine how io is
  13. served for a process.
  14. IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
  15. higher priority than any other in the system, processes from this class are
  16. given first access to the disk every time. Thus it needs to be used with some
  17. care, one io RT process can starve the entire system. Within the RT class,
  18. there are 8 levels of class data that determine exactly how much time this
  19. process needs the disk for on each service. In the future this might change
  20. to be more directly mappable to performance, by passing in a wanted data
  21. rate instead.
  22. IOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default
  23. for any process that hasn't set a specific io priority. The class data
  24. determines how much io bandwidth the process will get, it's directly mappable
  25. to the cpu nice levels just more coarsely implemented. 0 is the highest
  26. BE prio level, 7 is the lowest. The mapping between cpu nice level and io
  27. nice level is determined as: io_nice = (cpu_nice + 20) / 5.
  28. IOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this
  29. level only get io time when no one else needs the disk. The idle class has no
  30. class data, since it doesn't really apply here.
  31. Tools
  32. -----
  33. See below for a sample ionice tool. Usage:
  34. # ionice -c<class> -n<level> -p<pid>
  35. If pid isn't given, the current process is assumed. IO priority settings
  36. are inherited on fork, so you can use ionice to start the process at a given
  37. level:
  38. # ionice -c2 -n0 /bin/ls
  39. will run ls at the best-effort scheduling class at the highest priority.
  40. For a running process, you can give the pid instead:
  41. # ionice -c1 -n2 -p100
  42. will change pid 100 to run at the realtime scheduling class, at priority 2.
  43. ---> snip ionice.c tool <---
  44. #include <stdio.h>
  45. #include <stdlib.h>
  46. #include <errno.h>
  47. #include <getopt.h>
  48. #include <unistd.h>
  49. #include <sys/ptrace.h>
  50. #include <asm/unistd.h>
  51. extern int sys_ioprio_set(int, int, int);
  52. extern int sys_ioprio_get(int, int);
  53. #if defined(__i386__)
  54. #define __NR_ioprio_set 289
  55. #define __NR_ioprio_get 290
  56. #elif defined(__ppc__)
  57. #define __NR_ioprio_set 273
  58. #define __NR_ioprio_get 274
  59. #elif defined(__x86_64__)
  60. #define __NR_ioprio_set 251
  61. #define __NR_ioprio_get 252
  62. #elif defined(__ia64__)
  63. #define __NR_ioprio_set 1274
  64. #define __NR_ioprio_get 1275
  65. #else
  66. #error "Unsupported arch"
  67. #endif
  68. static inline int ioprio_set(int which, int who, int ioprio)
  69. {
  70. return syscall(__NR_ioprio_set, which, who, ioprio);
  71. }
  72. static inline int ioprio_get(int which, int who)
  73. {
  74. return syscall(__NR_ioprio_get, which, who);
  75. }
  76. enum {
  77. IOPRIO_CLASS_NONE,
  78. IOPRIO_CLASS_RT,
  79. IOPRIO_CLASS_BE,
  80. IOPRIO_CLASS_IDLE,
  81. };
  82. enum {
  83. IOPRIO_WHO_PROCESS = 1,
  84. IOPRIO_WHO_PGRP,
  85. IOPRIO_WHO_USER,
  86. };
  87. #define IOPRIO_CLASS_SHIFT 13
  88. const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
  89. int main(int argc, char *argv[])
  90. {
  91. int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE;
  92. int c, pid = 0;
  93. while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) {
  94. switch (c) {
  95. case 'n':
  96. ioprio = strtol(optarg, NULL, 10);
  97. set = 1;
  98. break;
  99. case 'c':
  100. ioprio_class = strtol(optarg, NULL, 10);
  101. set = 1;
  102. break;
  103. case 'p':
  104. pid = strtol(optarg, NULL, 10);
  105. break;
  106. }
  107. }
  108. switch (ioprio_class) {
  109. case IOPRIO_CLASS_NONE:
  110. ioprio_class = IOPRIO_CLASS_BE;
  111. break;
  112. case IOPRIO_CLASS_RT:
  113. case IOPRIO_CLASS_BE:
  114. break;
  115. case IOPRIO_CLASS_IDLE:
  116. ioprio = 7;
  117. break;
  118. default:
  119. printf("bad prio class %d\n", ioprio_class);
  120. return 1;
  121. }
  122. if (!set) {
  123. if (!pid && argv[optind])
  124. pid = strtol(argv[optind], NULL, 10);
  125. ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid);
  126. printf("pid=%d, %d\n", pid, ioprio);
  127. if (ioprio == -1)
  128. perror("ioprio_get");
  129. else {
  130. ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT;
  131. ioprio = ioprio & 0xff;
  132. printf("%s: prio %d\n", to_prio[ioprio_class], ioprio);
  133. }
  134. } else {
  135. if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) {
  136. perror("ioprio_set");
  137. return 1;
  138. }
  139. if (argv[optind])
  140. execvp(argv[optind], &argv[optind]);
  141. }
  142. return 0;
  143. }
  144. ---> snip ionice.c tool <---
  145. March 11 2005, Jens Axboe <jens.axboe@oracle.com>