bfq-wf2q.c 52 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699
  1. /*
  2. * Hierarchical Budget Worst-case Fair Weighted Fair Queueing
  3. * (B-WF2Q+): hierarchical scheduling algorithm by which the BFQ I/O
  4. * scheduler schedules generic entities. The latter can represent
  5. * either single bfq queues (associated with processes) or groups of
  6. * bfq queues (associated with cgroups).
  7. *
  8. * This program is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU General Public License as
  10. * published by the Free Software Foundation; either version 2 of the
  11. * License, or (at your option) any later version.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * General Public License for more details.
  17. */
  18. #include "bfq-iosched.h"
  19. /**
  20. * bfq_gt - compare two timestamps.
  21. * @a: first ts.
  22. * @b: second ts.
  23. *
  24. * Return @a > @b, dealing with wrapping correctly.
  25. */
  26. static int bfq_gt(u64 a, u64 b)
  27. {
  28. return (s64)(a - b) > 0;
  29. }
  30. static struct bfq_entity *bfq_root_active_entity(struct rb_root *tree)
  31. {
  32. struct rb_node *node = tree->rb_node;
  33. return rb_entry(node, struct bfq_entity, rb_node);
  34. }
  35. static unsigned int bfq_class_idx(struct bfq_entity *entity)
  36. {
  37. struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
  38. return bfqq ? bfqq->ioprio_class - 1 :
  39. BFQ_DEFAULT_GRP_CLASS - 1;
  40. }
  41. static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
  42. bool expiration);
  43. static bool bfq_update_parent_budget(struct bfq_entity *next_in_service);
  44. /**
  45. * bfq_update_next_in_service - update sd->next_in_service
  46. * @sd: sched_data for which to perform the update.
  47. * @new_entity: if not NULL, pointer to the entity whose activation,
  48. * requeueing or repositionig triggered the invocation of
  49. * this function.
  50. * @expiration: id true, this function is being invoked after the
  51. * expiration of the in-service entity
  52. *
  53. * This function is called to update sd->next_in_service, which, in
  54. * its turn, may change as a consequence of the insertion or
  55. * extraction of an entity into/from one of the active trees of
  56. * sd. These insertions/extractions occur as a consequence of
  57. * activations/deactivations of entities, with some activations being
  58. * 'true' activations, and other activations being requeueings (i.e.,
  59. * implementing the second, requeueing phase of the mechanism used to
  60. * reposition an entity in its active tree; see comments on
  61. * __bfq_activate_entity and __bfq_requeue_entity for details). In
  62. * both the last two activation sub-cases, new_entity points to the
  63. * just activated or requeued entity.
  64. *
  65. * Returns true if sd->next_in_service changes in such a way that
  66. * entity->parent may become the next_in_service for its parent
  67. * entity.
  68. */
  69. static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
  70. struct bfq_entity *new_entity,
  71. bool expiration)
  72. {
  73. struct bfq_entity *next_in_service = sd->next_in_service;
  74. bool parent_sched_may_change = false;
  75. bool change_without_lookup = false;
  76. /*
  77. * If this update is triggered by the activation, requeueing
  78. * or repositiong of an entity that does not coincide with
  79. * sd->next_in_service, then a full lookup in the active tree
  80. * can be avoided. In fact, it is enough to check whether the
  81. * just-modified entity has the same priority as
  82. * sd->next_in_service, is eligible and has a lower virtual
  83. * finish time than sd->next_in_service. If this compound
  84. * condition holds, then the new entity becomes the new
  85. * next_in_service. Otherwise no change is needed.
  86. */
  87. if (new_entity && new_entity != sd->next_in_service) {
  88. /*
  89. * Flag used to decide whether to replace
  90. * sd->next_in_service with new_entity. Tentatively
  91. * set to true, and left as true if
  92. * sd->next_in_service is NULL.
  93. */
  94. change_without_lookup = true;
  95. /*
  96. * If there is already a next_in_service candidate
  97. * entity, then compare timestamps to decide whether
  98. * to replace sd->service_tree with new_entity.
  99. */
  100. if (next_in_service) {
  101. unsigned int new_entity_class_idx =
  102. bfq_class_idx(new_entity);
  103. struct bfq_service_tree *st =
  104. sd->service_tree + new_entity_class_idx;
  105. change_without_lookup =
  106. (new_entity_class_idx ==
  107. bfq_class_idx(next_in_service)
  108. &&
  109. !bfq_gt(new_entity->start, st->vtime)
  110. &&
  111. bfq_gt(next_in_service->finish,
  112. new_entity->finish));
  113. }
  114. if (change_without_lookup)
  115. next_in_service = new_entity;
  116. }
  117. if (!change_without_lookup) /* lookup needed */
  118. next_in_service = bfq_lookup_next_entity(sd, expiration);
  119. if (next_in_service)
  120. parent_sched_may_change = !sd->next_in_service ||
  121. bfq_update_parent_budget(next_in_service);
  122. sd->next_in_service = next_in_service;
  123. if (!next_in_service)
  124. return parent_sched_may_change;
  125. return parent_sched_may_change;
  126. }
  127. #ifdef CONFIG_BFQ_GROUP_IOSCHED
  128. struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
  129. {
  130. struct bfq_entity *group_entity = bfqq->entity.parent;
  131. if (!group_entity)
  132. group_entity = &bfqq->bfqd->root_group->entity;
  133. return container_of(group_entity, struct bfq_group, entity);
  134. }
  135. /*
  136. * Returns true if this budget changes may let next_in_service->parent
  137. * become the next_in_service entity for its parent entity.
  138. */
  139. static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
  140. {
  141. struct bfq_entity *bfqg_entity;
  142. struct bfq_group *bfqg;
  143. struct bfq_sched_data *group_sd;
  144. bool ret = false;
  145. group_sd = next_in_service->sched_data;
  146. bfqg = container_of(group_sd, struct bfq_group, sched_data);
  147. /*
  148. * bfq_group's my_entity field is not NULL only if the group
  149. * is not the root group. We must not touch the root entity
  150. * as it must never become an in-service entity.
  151. */
  152. bfqg_entity = bfqg->my_entity;
  153. if (bfqg_entity) {
  154. if (bfqg_entity->budget > next_in_service->budget)
  155. ret = true;
  156. bfqg_entity->budget = next_in_service->budget;
  157. }
  158. return ret;
  159. }
  160. /*
  161. * This function tells whether entity stops being a candidate for next
  162. * service, according to the restrictive definition of the field
  163. * next_in_service. In particular, this function is invoked for an
  164. * entity that is about to be set in service.
  165. *
  166. * If entity is a queue, then the entity is no longer a candidate for
  167. * next service according to the that definition, because entity is
  168. * about to become the in-service queue. This function then returns
  169. * true if entity is a queue.
  170. *
  171. * In contrast, entity could still be a candidate for next service if
  172. * it is not a queue, and has more than one active child. In fact,
  173. * even if one of its children is about to be set in service, other
  174. * active children may still be the next to serve, for the parent
  175. * entity, even according to the above definition. As a consequence, a
  176. * non-queue entity is not a candidate for next-service only if it has
  177. * only one active child. And only if this condition holds, then this
  178. * function returns true for a non-queue entity.
  179. */
  180. static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
  181. {
  182. struct bfq_group *bfqg;
  183. if (bfq_entity_to_bfqq(entity))
  184. return true;
  185. bfqg = container_of(entity, struct bfq_group, entity);
  186. /*
  187. * The field active_entities does not always contain the
  188. * actual number of active children entities: it happens to
  189. * not account for the in-service entity in case the latter is
  190. * removed from its active tree (which may get done after
  191. * invoking the function bfq_no_longer_next_in_service in
  192. * bfq_get_next_queue). Fortunately, here, i.e., while
  193. * bfq_no_longer_next_in_service is not yet completed in
  194. * bfq_get_next_queue, bfq_active_extract has not yet been
  195. * invoked, and thus active_entities still coincides with the
  196. * actual number of active entities.
  197. */
  198. if (bfqg->active_entities == 1)
  199. return true;
  200. return false;
  201. }
  202. #else /* CONFIG_BFQ_GROUP_IOSCHED */
  203. struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
  204. {
  205. return bfqq->bfqd->root_group;
  206. }
  207. static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
  208. {
  209. return false;
  210. }
  211. static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
  212. {
  213. return true;
  214. }
  215. #endif /* CONFIG_BFQ_GROUP_IOSCHED */
  216. /*
  217. * Shift for timestamp calculations. This actually limits the maximum
  218. * service allowed in one timestamp delta (small shift values increase it),
  219. * the maximum total weight that can be used for the queues in the system
  220. * (big shift values increase it), and the period of virtual time
  221. * wraparounds.
  222. */
  223. #define WFQ_SERVICE_SHIFT 22
  224. struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)
  225. {
  226. struct bfq_queue *bfqq = NULL;
  227. if (!entity->my_sched_data)
  228. bfqq = container_of(entity, struct bfq_queue, entity);
  229. return bfqq;
  230. }
  231. /**
  232. * bfq_delta - map service into the virtual time domain.
  233. * @service: amount of service.
  234. * @weight: scale factor (weight of an entity or weight sum).
  235. */
  236. static u64 bfq_delta(unsigned long service, unsigned long weight)
  237. {
  238. u64 d = (u64)service << WFQ_SERVICE_SHIFT;
  239. do_div(d, weight);
  240. return d;
  241. }
  242. /**
  243. * bfq_calc_finish - assign the finish time to an entity.
  244. * @entity: the entity to act upon.
  245. * @service: the service to be charged to the entity.
  246. */
  247. static void bfq_calc_finish(struct bfq_entity *entity, unsigned long service)
  248. {
  249. struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
  250. entity->finish = entity->start +
  251. bfq_delta(service, entity->weight);
  252. if (bfqq) {
  253. bfq_log_bfqq(bfqq->bfqd, bfqq,
  254. "calc_finish: serv %lu, w %d",
  255. service, entity->weight);
  256. bfq_log_bfqq(bfqq->bfqd, bfqq,
  257. "calc_finish: start %llu, finish %llu, delta %llu",
  258. entity->start, entity->finish,
  259. bfq_delta(service, entity->weight));
  260. }
  261. }
  262. /**
  263. * bfq_entity_of - get an entity from a node.
  264. * @node: the node field of the entity.
  265. *
  266. * Convert a node pointer to the relative entity. This is used only
  267. * to simplify the logic of some functions and not as the generic
  268. * conversion mechanism because, e.g., in the tree walking functions,
  269. * the check for a %NULL value would be redundant.
  270. */
  271. struct bfq_entity *bfq_entity_of(struct rb_node *node)
  272. {
  273. struct bfq_entity *entity = NULL;
  274. if (node)
  275. entity = rb_entry(node, struct bfq_entity, rb_node);
  276. return entity;
  277. }
  278. /**
  279. * bfq_extract - remove an entity from a tree.
  280. * @root: the tree root.
  281. * @entity: the entity to remove.
  282. */
  283. static void bfq_extract(struct rb_root *root, struct bfq_entity *entity)
  284. {
  285. entity->tree = NULL;
  286. rb_erase(&entity->rb_node, root);
  287. }
  288. /**
  289. * bfq_idle_extract - extract an entity from the idle tree.
  290. * @st: the service tree of the owning @entity.
  291. * @entity: the entity being removed.
  292. */
  293. static void bfq_idle_extract(struct bfq_service_tree *st,
  294. struct bfq_entity *entity)
  295. {
  296. struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
  297. struct rb_node *next;
  298. if (entity == st->first_idle) {
  299. next = rb_next(&entity->rb_node);
  300. st->first_idle = bfq_entity_of(next);
  301. }
  302. if (entity == st->last_idle) {
  303. next = rb_prev(&entity->rb_node);
  304. st->last_idle = bfq_entity_of(next);
  305. }
  306. bfq_extract(&st->idle, entity);
  307. if (bfqq)
  308. list_del(&bfqq->bfqq_list);
  309. }
  310. /**
  311. * bfq_insert - generic tree insertion.
  312. * @root: tree root.
  313. * @entity: entity to insert.
  314. *
  315. * This is used for the idle and the active tree, since they are both
  316. * ordered by finish time.
  317. */
  318. static void bfq_insert(struct rb_root *root, struct bfq_entity *entity)
  319. {
  320. struct bfq_entity *entry;
  321. struct rb_node **node = &root->rb_node;
  322. struct rb_node *parent = NULL;
  323. while (*node) {
  324. parent = *node;
  325. entry = rb_entry(parent, struct bfq_entity, rb_node);
  326. if (bfq_gt(entry->finish, entity->finish))
  327. node = &parent->rb_left;
  328. else
  329. node = &parent->rb_right;
  330. }
  331. rb_link_node(&entity->rb_node, parent, node);
  332. rb_insert_color(&entity->rb_node, root);
  333. entity->tree = root;
  334. }
  335. /**
  336. * bfq_update_min - update the min_start field of a entity.
  337. * @entity: the entity to update.
  338. * @node: one of its children.
  339. *
  340. * This function is called when @entity may store an invalid value for
  341. * min_start due to updates to the active tree. The function assumes
  342. * that the subtree rooted at @node (which may be its left or its right
  343. * child) has a valid min_start value.
  344. */
  345. static void bfq_update_min(struct bfq_entity *entity, struct rb_node *node)
  346. {
  347. struct bfq_entity *child;
  348. if (node) {
  349. child = rb_entry(node, struct bfq_entity, rb_node);
  350. if (bfq_gt(entity->min_start, child->min_start))
  351. entity->min_start = child->min_start;
  352. }
  353. }
  354. /**
  355. * bfq_update_active_node - recalculate min_start.
  356. * @node: the node to update.
  357. *
  358. * @node may have changed position or one of its children may have moved,
  359. * this function updates its min_start value. The left and right subtrees
  360. * are assumed to hold a correct min_start value.
  361. */
  362. static void bfq_update_active_node(struct rb_node *node)
  363. {
  364. struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node);
  365. entity->min_start = entity->start;
  366. bfq_update_min(entity, node->rb_right);
  367. bfq_update_min(entity, node->rb_left);
  368. }
  369. /**
  370. * bfq_update_active_tree - update min_start for the whole active tree.
  371. * @node: the starting node.
  372. *
  373. * @node must be the deepest modified node after an update. This function
  374. * updates its min_start using the values held by its children, assuming
  375. * that they did not change, and then updates all the nodes that may have
  376. * changed in the path to the root. The only nodes that may have changed
  377. * are the ones in the path or their siblings.
  378. */
  379. static void bfq_update_active_tree(struct rb_node *node)
  380. {
  381. struct rb_node *parent;
  382. up:
  383. bfq_update_active_node(node);
  384. parent = rb_parent(node);
  385. if (!parent)
  386. return;
  387. if (node == parent->rb_left && parent->rb_right)
  388. bfq_update_active_node(parent->rb_right);
  389. else if (parent->rb_left)
  390. bfq_update_active_node(parent->rb_left);
  391. node = parent;
  392. goto up;
  393. }
  394. /**
  395. * bfq_active_insert - insert an entity in the active tree of its
  396. * group/device.
  397. * @st: the service tree of the entity.
  398. * @entity: the entity being inserted.
  399. *
  400. * The active tree is ordered by finish time, but an extra key is kept
  401. * per each node, containing the minimum value for the start times of
  402. * its children (and the node itself), so it's possible to search for
  403. * the eligible node with the lowest finish time in logarithmic time.
  404. */
  405. static void bfq_active_insert(struct bfq_service_tree *st,
  406. struct bfq_entity *entity)
  407. {
  408. struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
  409. struct rb_node *node = &entity->rb_node;
  410. #ifdef CONFIG_BFQ_GROUP_IOSCHED
  411. struct bfq_sched_data *sd = NULL;
  412. struct bfq_group *bfqg = NULL;
  413. struct bfq_data *bfqd = NULL;
  414. #endif
  415. bfq_insert(&st->active, entity);
  416. if (node->rb_left)
  417. node = node->rb_left;
  418. else if (node->rb_right)
  419. node = node->rb_right;
  420. bfq_update_active_tree(node);
  421. #ifdef CONFIG_BFQ_GROUP_IOSCHED
  422. sd = entity->sched_data;
  423. bfqg = container_of(sd, struct bfq_group, sched_data);
  424. bfqd = (struct bfq_data *)bfqg->bfqd;
  425. #endif
  426. if (bfqq)
  427. list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list);
  428. #ifdef CONFIG_BFQ_GROUP_IOSCHED
  429. else /* bfq_group */
  430. bfq_weights_tree_add(bfqd, entity, &bfqd->group_weights_tree);
  431. if (bfqg != bfqd->root_group)
  432. bfqg->active_entities++;
  433. #endif
  434. }
  435. /**
  436. * bfq_ioprio_to_weight - calc a weight from an ioprio.
  437. * @ioprio: the ioprio value to convert.
  438. */
  439. unsigned short bfq_ioprio_to_weight(int ioprio)
  440. {
  441. return (IOPRIO_BE_NR - ioprio) * BFQ_WEIGHT_CONVERSION_COEFF;
  442. }
  443. /**
  444. * bfq_weight_to_ioprio - calc an ioprio from a weight.
  445. * @weight: the weight value to convert.
  446. *
  447. * To preserve as much as possible the old only-ioprio user interface,
  448. * 0 is used as an escape ioprio value for weights (numerically) equal or
  449. * larger than IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF.
  450. */
  451. static unsigned short bfq_weight_to_ioprio(int weight)
  452. {
  453. return max_t(int, 0,
  454. IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - weight);
  455. }
  456. static void bfq_get_entity(struct bfq_entity *entity)
  457. {
  458. struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
  459. if (bfqq) {
  460. bfqq->ref++;
  461. bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
  462. bfqq, bfqq->ref);
  463. }
  464. }
  465. /**
  466. * bfq_find_deepest - find the deepest node that an extraction can modify.
  467. * @node: the node being removed.
  468. *
  469. * Do the first step of an extraction in an rb tree, looking for the
  470. * node that will replace @node, and returning the deepest node that
  471. * the following modifications to the tree can touch. If @node is the
  472. * last node in the tree return %NULL.
  473. */
  474. static struct rb_node *bfq_find_deepest(struct rb_node *node)
  475. {
  476. struct rb_node *deepest;
  477. if (!node->rb_right && !node->rb_left)
  478. deepest = rb_parent(node);
  479. else if (!node->rb_right)
  480. deepest = node->rb_left;
  481. else if (!node->rb_left)
  482. deepest = node->rb_right;
  483. else {
  484. deepest = rb_next(node);
  485. if (deepest->rb_right)
  486. deepest = deepest->rb_right;
  487. else if (rb_parent(deepest) != node)
  488. deepest = rb_parent(deepest);
  489. }
  490. return deepest;
  491. }
  492. /**
  493. * bfq_active_extract - remove an entity from the active tree.
  494. * @st: the service_tree containing the tree.
  495. * @entity: the entity being removed.
  496. */
  497. static void bfq_active_extract(struct bfq_service_tree *st,
  498. struct bfq_entity *entity)
  499. {
  500. struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
  501. struct rb_node *node;
  502. #ifdef CONFIG_BFQ_GROUP_IOSCHED
  503. struct bfq_sched_data *sd = NULL;
  504. struct bfq_group *bfqg = NULL;
  505. struct bfq_data *bfqd = NULL;
  506. #endif
  507. node = bfq_find_deepest(&entity->rb_node);
  508. bfq_extract(&st->active, entity);
  509. if (node)
  510. bfq_update_active_tree(node);
  511. #ifdef CONFIG_BFQ_GROUP_IOSCHED
  512. sd = entity->sched_data;
  513. bfqg = container_of(sd, struct bfq_group, sched_data);
  514. bfqd = (struct bfq_data *)bfqg->bfqd;
  515. #endif
  516. if (bfqq)
  517. list_del(&bfqq->bfqq_list);
  518. #ifdef CONFIG_BFQ_GROUP_IOSCHED
  519. else /* bfq_group */
  520. bfq_weights_tree_remove(bfqd, entity,
  521. &bfqd->group_weights_tree);
  522. if (bfqg != bfqd->root_group)
  523. bfqg->active_entities--;
  524. #endif
  525. }
  526. /**
  527. * bfq_idle_insert - insert an entity into the idle tree.
  528. * @st: the service tree containing the tree.
  529. * @entity: the entity to insert.
  530. */
  531. static void bfq_idle_insert(struct bfq_service_tree *st,
  532. struct bfq_entity *entity)
  533. {
  534. struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
  535. struct bfq_entity *first_idle = st->first_idle;
  536. struct bfq_entity *last_idle = st->last_idle;
  537. if (!first_idle || bfq_gt(first_idle->finish, entity->finish))
  538. st->first_idle = entity;
  539. if (!last_idle || bfq_gt(entity->finish, last_idle->finish))
  540. st->last_idle = entity;
  541. bfq_insert(&st->idle, entity);
  542. if (bfqq)
  543. list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list);
  544. }
  545. /**
  546. * bfq_forget_entity - do not consider entity any longer for scheduling
  547. * @st: the service tree.
  548. * @entity: the entity being removed.
  549. * @is_in_service: true if entity is currently the in-service entity.
  550. *
  551. * Forget everything about @entity. In addition, if entity represents
  552. * a queue, and the latter is not in service, then release the service
  553. * reference to the queue (the one taken through bfq_get_entity). In
  554. * fact, in this case, there is really no more service reference to
  555. * the queue, as the latter is also outside any service tree. If,
  556. * instead, the queue is in service, then __bfq_bfqd_reset_in_service
  557. * will take care of putting the reference when the queue finally
  558. * stops being served.
  559. */
  560. static void bfq_forget_entity(struct bfq_service_tree *st,
  561. struct bfq_entity *entity,
  562. bool is_in_service)
  563. {
  564. struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
  565. entity->on_st = false;
  566. st->wsum -= entity->weight;
  567. if (bfqq && !is_in_service)
  568. bfq_put_queue(bfqq);
  569. }
  570. /**
  571. * bfq_put_idle_entity - release the idle tree ref of an entity.
  572. * @st: service tree for the entity.
  573. * @entity: the entity being released.
  574. */
  575. void bfq_put_idle_entity(struct bfq_service_tree *st, struct bfq_entity *entity)
  576. {
  577. bfq_idle_extract(st, entity);
  578. bfq_forget_entity(st, entity,
  579. entity == entity->sched_data->in_service_entity);
  580. }
  581. /**
  582. * bfq_forget_idle - update the idle tree if necessary.
  583. * @st: the service tree to act upon.
  584. *
  585. * To preserve the global O(log N) complexity we only remove one entry here;
  586. * as the idle tree will not grow indefinitely this can be done safely.
  587. */
  588. static void bfq_forget_idle(struct bfq_service_tree *st)
  589. {
  590. struct bfq_entity *first_idle = st->first_idle;
  591. struct bfq_entity *last_idle = st->last_idle;
  592. if (RB_EMPTY_ROOT(&st->active) && last_idle &&
  593. !bfq_gt(last_idle->finish, st->vtime)) {
  594. /*
  595. * Forget the whole idle tree, increasing the vtime past
  596. * the last finish time of idle entities.
  597. */
  598. st->vtime = last_idle->finish;
  599. }
  600. if (first_idle && !bfq_gt(first_idle->finish, st->vtime))
  601. bfq_put_idle_entity(st, first_idle);
  602. }
  603. struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity)
  604. {
  605. struct bfq_sched_data *sched_data = entity->sched_data;
  606. unsigned int idx = bfq_class_idx(entity);
  607. return sched_data->service_tree + idx;
  608. }
  609. /*
  610. * Update weight and priority of entity. If update_class_too is true,
  611. * then update the ioprio_class of entity too.
  612. *
  613. * The reason why the update of ioprio_class is controlled through the
  614. * last parameter is as follows. Changing the ioprio class of an
  615. * entity implies changing the destination service trees for that
  616. * entity. If such a change occurred when the entity is already on one
  617. * of the service trees for its previous class, then the state of the
  618. * entity would become more complex: none of the new possible service
  619. * trees for the entity, according to bfq_entity_service_tree(), would
  620. * match any of the possible service trees on which the entity
  621. * is. Complex operations involving these trees, such as entity
  622. * activations and deactivations, should take into account this
  623. * additional complexity. To avoid this issue, this function is
  624. * invoked with update_class_too unset in the points in the code where
  625. * entity may happen to be on some tree.
  626. */
  627. struct bfq_service_tree *
  628. __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
  629. struct bfq_entity *entity,
  630. bool update_class_too)
  631. {
  632. struct bfq_service_tree *new_st = old_st;
  633. if (entity->prio_changed) {
  634. struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
  635. unsigned int prev_weight, new_weight;
  636. struct bfq_data *bfqd = NULL;
  637. struct rb_root *root;
  638. #ifdef CONFIG_BFQ_GROUP_IOSCHED
  639. struct bfq_sched_data *sd;
  640. struct bfq_group *bfqg;
  641. #endif
  642. if (bfqq)
  643. bfqd = bfqq->bfqd;
  644. #ifdef CONFIG_BFQ_GROUP_IOSCHED
  645. else {
  646. sd = entity->my_sched_data;
  647. bfqg = container_of(sd, struct bfq_group, sched_data);
  648. bfqd = (struct bfq_data *)bfqg->bfqd;
  649. }
  650. #endif
  651. old_st->wsum -= entity->weight;
  652. if (entity->new_weight != entity->orig_weight) {
  653. if (entity->new_weight < BFQ_MIN_WEIGHT ||
  654. entity->new_weight > BFQ_MAX_WEIGHT) {
  655. pr_crit("update_weight_prio: new_weight %d\n",
  656. entity->new_weight);
  657. if (entity->new_weight < BFQ_MIN_WEIGHT)
  658. entity->new_weight = BFQ_MIN_WEIGHT;
  659. else
  660. entity->new_weight = BFQ_MAX_WEIGHT;
  661. }
  662. entity->orig_weight = entity->new_weight;
  663. if (bfqq)
  664. bfqq->ioprio =
  665. bfq_weight_to_ioprio(entity->orig_weight);
  666. }
  667. if (bfqq && update_class_too)
  668. bfqq->ioprio_class = bfqq->new_ioprio_class;
  669. /*
  670. * Reset prio_changed only if the ioprio_class change
  671. * is not pending any longer.
  672. */
  673. if (!bfqq || bfqq->ioprio_class == bfqq->new_ioprio_class)
  674. entity->prio_changed = 0;
  675. /*
  676. * NOTE: here we may be changing the weight too early,
  677. * this will cause unfairness. The correct approach
  678. * would have required additional complexity to defer
  679. * weight changes to the proper time instants (i.e.,
  680. * when entity->finish <= old_st->vtime).
  681. */
  682. new_st = bfq_entity_service_tree(entity);
  683. prev_weight = entity->weight;
  684. new_weight = entity->orig_weight *
  685. (bfqq ? bfqq->wr_coeff : 1);
  686. /*
  687. * If the weight of the entity changes, remove the entity
  688. * from its old weight counter (if there is a counter
  689. * associated with the entity), and add it to the counter
  690. * associated with its new weight.
  691. */
  692. if (prev_weight != new_weight) {
  693. root = bfqq ? &bfqd->queue_weights_tree :
  694. &bfqd->group_weights_tree;
  695. bfq_weights_tree_remove(bfqd, entity, root);
  696. }
  697. entity->weight = new_weight;
  698. /*
  699. * Add the entity to its weights tree only if it is
  700. * not associated with a weight-raised queue.
  701. */
  702. if (prev_weight != new_weight &&
  703. (bfqq ? bfqq->wr_coeff == 1 : 1))
  704. /* If we get here, root has been initialized. */
  705. bfq_weights_tree_add(bfqd, entity, root);
  706. new_st->wsum += entity->weight;
  707. if (new_st != old_st)
  708. entity->start = new_st->vtime;
  709. }
  710. return new_st;
  711. }
  712. /**
  713. * bfq_bfqq_served - update the scheduler status after selection for
  714. * service.
  715. * @bfqq: the queue being served.
  716. * @served: bytes to transfer.
  717. *
  718. * NOTE: this can be optimized, as the timestamps of upper level entities
  719. * are synchronized every time a new bfqq is selected for service. By now,
  720. * we keep it to better check consistency.
  721. */
  722. void bfq_bfqq_served(struct bfq_queue *bfqq, int served)
  723. {
  724. struct bfq_entity *entity = &bfqq->entity;
  725. struct bfq_service_tree *st;
  726. for_each_entity(entity) {
  727. st = bfq_entity_service_tree(entity);
  728. entity->service += served;
  729. st->vtime += bfq_delta(served, st->wsum);
  730. bfq_forget_idle(st);
  731. }
  732. bfqg_stats_set_start_empty_time(bfqq_group(bfqq));
  733. bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs", served);
  734. }
  735. /**
  736. * bfq_bfqq_charge_time - charge an amount of service equivalent to the length
  737. * of the time interval during which bfqq has been in
  738. * service.
  739. * @bfqd: the device
  740. * @bfqq: the queue that needs a service update.
  741. * @time_ms: the amount of time during which the queue has received service
  742. *
  743. * If a queue does not consume its budget fast enough, then providing
  744. * the queue with service fairness may impair throughput, more or less
  745. * severely. For this reason, queues that consume their budget slowly
  746. * are provided with time fairness instead of service fairness. This
  747. * goal is achieved through the BFQ scheduling engine, even if such an
  748. * engine works in the service, and not in the time domain. The trick
  749. * is charging these queues with an inflated amount of service, equal
  750. * to the amount of service that they would have received during their
  751. * service slot if they had been fast, i.e., if their requests had
  752. * been dispatched at a rate equal to the estimated peak rate.
  753. *
  754. * It is worth noting that time fairness can cause important
  755. * distortions in terms of bandwidth distribution, on devices with
  756. * internal queueing. The reason is that I/O requests dispatched
  757. * during the service slot of a queue may be served after that service
  758. * slot is finished, and may have a total processing time loosely
  759. * correlated with the duration of the service slot. This is
  760. * especially true for short service slots.
  761. */
  762. void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  763. unsigned long time_ms)
  764. {
  765. struct bfq_entity *entity = &bfqq->entity;
  766. int tot_serv_to_charge = entity->service;
  767. unsigned int timeout_ms = jiffies_to_msecs(bfq_timeout);
  768. if (time_ms > 0 && time_ms < timeout_ms)
  769. tot_serv_to_charge =
  770. (bfqd->bfq_max_budget * time_ms) / timeout_ms;
  771. if (tot_serv_to_charge < entity->service)
  772. tot_serv_to_charge = entity->service;
  773. /* Increase budget to avoid inconsistencies */
  774. if (tot_serv_to_charge > entity->budget)
  775. entity->budget = tot_serv_to_charge;
  776. bfq_bfqq_served(bfqq,
  777. max_t(int, 0, tot_serv_to_charge - entity->service));
  778. }
  779. static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,
  780. struct bfq_service_tree *st,
  781. bool backshifted)
  782. {
  783. struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
  784. /*
  785. * When this function is invoked, entity is not in any service
  786. * tree, then it is safe to invoke next function with the last
  787. * parameter set (see the comments on the function).
  788. */
  789. st = __bfq_entity_update_weight_prio(st, entity, true);
  790. bfq_calc_finish(entity, entity->budget);
  791. /*
  792. * If some queues enjoy backshifting for a while, then their
  793. * (virtual) finish timestamps may happen to become lower and
  794. * lower than the system virtual time. In particular, if
  795. * these queues often happen to be idle for short time
  796. * periods, and during such time periods other queues with
  797. * higher timestamps happen to be busy, then the backshifted
  798. * timestamps of the former queues can become much lower than
  799. * the system virtual time. In fact, to serve the queues with
  800. * higher timestamps while the ones with lower timestamps are
  801. * idle, the system virtual time may be pushed-up to much
  802. * higher values than the finish timestamps of the idle
  803. * queues. As a consequence, the finish timestamps of all new
  804. * or newly activated queues may end up being much larger than
  805. * those of lucky queues with backshifted timestamps. The
  806. * latter queues may then monopolize the device for a lot of
  807. * time. This would simply break service guarantees.
  808. *
  809. * To reduce this problem, push up a little bit the
  810. * backshifted timestamps of the queue associated with this
  811. * entity (only a queue can happen to have the backshifted
  812. * flag set): just enough to let the finish timestamp of the
  813. * queue be equal to the current value of the system virtual
  814. * time. This may introduce a little unfairness among queues
  815. * with backshifted timestamps, but it does not break
  816. * worst-case fairness guarantees.
  817. *
  818. * As a special case, if bfqq is weight-raised, push up
  819. * timestamps much less, to keep very low the probability that
  820. * this push up causes the backshifted finish timestamps of
  821. * weight-raised queues to become higher than the backshifted
  822. * finish timestamps of non weight-raised queues.
  823. */
  824. if (backshifted && bfq_gt(st->vtime, entity->finish)) {
  825. unsigned long delta = st->vtime - entity->finish;
  826. if (bfqq)
  827. delta /= bfqq->wr_coeff;
  828. entity->start += delta;
  829. entity->finish += delta;
  830. }
  831. bfq_active_insert(st, entity);
  832. }
  833. /**
  834. * __bfq_activate_entity - handle activation of entity.
  835. * @entity: the entity being activated.
  836. * @non_blocking_wait_rq: true if entity was waiting for a request
  837. *
  838. * Called for a 'true' activation, i.e., if entity is not active and
  839. * one of its children receives a new request.
  840. *
  841. * Basically, this function updates the timestamps of entity and
  842. * inserts entity into its active tree, ater possibly extracting it
  843. * from its idle tree.
  844. */
  845. static void __bfq_activate_entity(struct bfq_entity *entity,
  846. bool non_blocking_wait_rq)
  847. {
  848. struct bfq_service_tree *st = bfq_entity_service_tree(entity);
  849. bool backshifted = false;
  850. unsigned long long min_vstart;
  851. /* See comments on bfq_fqq_update_budg_for_activation */
  852. if (non_blocking_wait_rq && bfq_gt(st->vtime, entity->finish)) {
  853. backshifted = true;
  854. min_vstart = entity->finish;
  855. } else
  856. min_vstart = st->vtime;
  857. if (entity->tree == &st->idle) {
  858. /*
  859. * Must be on the idle tree, bfq_idle_extract() will
  860. * check for that.
  861. */
  862. bfq_idle_extract(st, entity);
  863. entity->start = bfq_gt(min_vstart, entity->finish) ?
  864. min_vstart : entity->finish;
  865. } else {
  866. /*
  867. * The finish time of the entity may be invalid, and
  868. * it is in the past for sure, otherwise the queue
  869. * would have been on the idle tree.
  870. */
  871. entity->start = min_vstart;
  872. st->wsum += entity->weight;
  873. /*
  874. * entity is about to be inserted into a service tree,
  875. * and then set in service: get a reference to make
  876. * sure entity does not disappear until it is no
  877. * longer in service or scheduled for service.
  878. */
  879. bfq_get_entity(entity);
  880. entity->on_st = true;
  881. }
  882. bfq_update_fin_time_enqueue(entity, st, backshifted);
  883. }
  884. /**
  885. * __bfq_requeue_entity - handle requeueing or repositioning of an entity.
  886. * @entity: the entity being requeued or repositioned.
  887. *
  888. * Requeueing is needed if this entity stops being served, which
  889. * happens if a leaf descendant entity has expired. On the other hand,
  890. * repositioning is needed if the next_inservice_entity for the child
  891. * entity has changed. See the comments inside the function for
  892. * details.
  893. *
  894. * Basically, this function: 1) removes entity from its active tree if
  895. * present there, 2) updates the timestamps of entity and 3) inserts
  896. * entity back into its active tree (in the new, right position for
  897. * the new values of the timestamps).
  898. */
  899. static void __bfq_requeue_entity(struct bfq_entity *entity)
  900. {
  901. struct bfq_sched_data *sd = entity->sched_data;
  902. struct bfq_service_tree *st = bfq_entity_service_tree(entity);
  903. if (entity == sd->in_service_entity) {
  904. /*
  905. * We are requeueing the current in-service entity,
  906. * which may have to be done for one of the following
  907. * reasons:
  908. * - entity represents the in-service queue, and the
  909. * in-service queue is being requeued after an
  910. * expiration;
  911. * - entity represents a group, and its budget has
  912. * changed because one of its child entities has
  913. * just been either activated or requeued for some
  914. * reason; the timestamps of the entity need then to
  915. * be updated, and the entity needs to be enqueued
  916. * or repositioned accordingly.
  917. *
  918. * In particular, before requeueing, the start time of
  919. * the entity must be moved forward to account for the
  920. * service that the entity has received while in
  921. * service. This is done by the next instructions. The
  922. * finish time will then be updated according to this
  923. * new value of the start time, and to the budget of
  924. * the entity.
  925. */
  926. bfq_calc_finish(entity, entity->service);
  927. entity->start = entity->finish;
  928. /*
  929. * In addition, if the entity had more than one child
  930. * when set in service, then it was not extracted from
  931. * the active tree. This implies that the position of
  932. * the entity in the active tree may need to be
  933. * changed now, because we have just updated the start
  934. * time of the entity, and we will update its finish
  935. * time in a moment (the requeueing is then, more
  936. * precisely, a repositioning in this case). To
  937. * implement this repositioning, we: 1) dequeue the
  938. * entity here, 2) update the finish time and requeue
  939. * the entity according to the new timestamps below.
  940. */
  941. if (entity->tree)
  942. bfq_active_extract(st, entity);
  943. } else { /* The entity is already active, and not in service */
  944. /*
  945. * In this case, this function gets called only if the
  946. * next_in_service entity below this entity has
  947. * changed, and this change has caused the budget of
  948. * this entity to change, which, finally implies that
  949. * the finish time of this entity must be
  950. * updated. Such an update may cause the scheduling,
  951. * i.e., the position in the active tree, of this
  952. * entity to change. We handle this change by: 1)
  953. * dequeueing the entity here, 2) updating the finish
  954. * time and requeueing the entity according to the new
  955. * timestamps below. This is the same approach as the
  956. * non-extracted-entity sub-case above.
  957. */
  958. bfq_active_extract(st, entity);
  959. }
  960. bfq_update_fin_time_enqueue(entity, st, false);
  961. }
  962. static void __bfq_activate_requeue_entity(struct bfq_entity *entity,
  963. struct bfq_sched_data *sd,
  964. bool non_blocking_wait_rq)
  965. {
  966. struct bfq_service_tree *st = bfq_entity_service_tree(entity);
  967. if (sd->in_service_entity == entity || entity->tree == &st->active)
  968. /*
  969. * in service or already queued on the active tree,
  970. * requeue or reposition
  971. */
  972. __bfq_requeue_entity(entity);
  973. else
  974. /*
  975. * Not in service and not queued on its active tree:
  976. * the activity is idle and this is a true activation.
  977. */
  978. __bfq_activate_entity(entity, non_blocking_wait_rq);
  979. }
  980. /**
  981. * bfq_activate_requeue_entity - activate or requeue an entity representing a
  982. * bfq_queue, and activate, requeue or reposition
  983. * all ancestors for which such an update becomes
  984. * necessary.
  985. * @entity: the entity to activate.
  986. * @non_blocking_wait_rq: true if this entity was waiting for a request
  987. * @requeue: true if this is a requeue, which implies that bfqq is
  988. * being expired; thus ALL its ancestors stop being served and must
  989. * therefore be requeued
  990. * @expiration: true if this function is being invoked in the expiration path
  991. * of the in-service queue
  992. */
  993. static void bfq_activate_requeue_entity(struct bfq_entity *entity,
  994. bool non_blocking_wait_rq,
  995. bool requeue, bool expiration)
  996. {
  997. struct bfq_sched_data *sd;
  998. for_each_entity(entity) {
  999. sd = entity->sched_data;
  1000. __bfq_activate_requeue_entity(entity, sd, non_blocking_wait_rq);
  1001. if (!bfq_update_next_in_service(sd, entity, expiration) &&
  1002. !requeue)
  1003. break;
  1004. }
  1005. }
  1006. /**
  1007. * __bfq_deactivate_entity - deactivate an entity from its service tree.
  1008. * @entity: the entity to deactivate.
  1009. * @ins_into_idle_tree: if false, the entity will not be put into the
  1010. * idle tree.
  1011. *
  1012. * Deactivates an entity, independently of its previous state. Must
  1013. * be invoked only if entity is on a service tree. Extracts the entity
  1014. * from that tree, and if necessary and allowed, puts it into the idle
  1015. * tree.
  1016. */
  1017. bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
  1018. {
  1019. struct bfq_sched_data *sd = entity->sched_data;
  1020. struct bfq_service_tree *st;
  1021. bool is_in_service;
  1022. if (!entity->on_st) /* entity never activated, or already inactive */
  1023. return false;
  1024. /*
  1025. * If we get here, then entity is active, which implies that
  1026. * bfq_group_set_parent has already been invoked for the group
  1027. * represented by entity. Therefore, the field
  1028. * entity->sched_data has been set, and we can safely use it.
  1029. */
  1030. st = bfq_entity_service_tree(entity);
  1031. is_in_service = entity == sd->in_service_entity;
  1032. bfq_calc_finish(entity, entity->service);
  1033. if (is_in_service)
  1034. sd->in_service_entity = NULL;
  1035. else
  1036. /*
  1037. * Non in-service entity: nobody will take care of
  1038. * resetting its service counter on expiration. Do it
  1039. * now.
  1040. */
  1041. entity->service = 0;
  1042. if (entity->tree == &st->active)
  1043. bfq_active_extract(st, entity);
  1044. else if (!is_in_service && entity->tree == &st->idle)
  1045. bfq_idle_extract(st, entity);
  1046. if (!ins_into_idle_tree || !bfq_gt(entity->finish, st->vtime))
  1047. bfq_forget_entity(st, entity, is_in_service);
  1048. else
  1049. bfq_idle_insert(st, entity);
  1050. return true;
  1051. }
  1052. /**
  1053. * bfq_deactivate_entity - deactivate an entity representing a bfq_queue.
  1054. * @entity: the entity to deactivate.
  1055. * @ins_into_idle_tree: true if the entity can be put into the idle tree
  1056. * @expiration: true if this function is being invoked in the expiration path
  1057. * of the in-service queue
  1058. */
  1059. static void bfq_deactivate_entity(struct bfq_entity *entity,
  1060. bool ins_into_idle_tree,
  1061. bool expiration)
  1062. {
  1063. struct bfq_sched_data *sd;
  1064. struct bfq_entity *parent = NULL;
  1065. for_each_entity_safe(entity, parent) {
  1066. sd = entity->sched_data;
  1067. if (!__bfq_deactivate_entity(entity, ins_into_idle_tree)) {
  1068. /*
  1069. * entity is not in any tree any more, so
  1070. * this deactivation is a no-op, and there is
  1071. * nothing to change for upper-level entities
  1072. * (in case of expiration, this can never
  1073. * happen).
  1074. */
  1075. return;
  1076. }
  1077. if (sd->next_in_service == entity)
  1078. /*
  1079. * entity was the next_in_service entity,
  1080. * then, since entity has just been
  1081. * deactivated, a new one must be found.
  1082. */
  1083. bfq_update_next_in_service(sd, NULL, expiration);
  1084. if (sd->next_in_service || sd->in_service_entity) {
  1085. /*
  1086. * The parent entity is still active, because
  1087. * either next_in_service or in_service_entity
  1088. * is not NULL. So, no further upwards
  1089. * deactivation must be performed. Yet,
  1090. * next_in_service has changed. Then the
  1091. * schedule does need to be updated upwards.
  1092. *
  1093. * NOTE If in_service_entity is not NULL, then
  1094. * next_in_service may happen to be NULL,
  1095. * although the parent entity is evidently
  1096. * active. This happens if 1) the entity
  1097. * pointed by in_service_entity is the only
  1098. * active entity in the parent entity, and 2)
  1099. * according to the definition of
  1100. * next_in_service, the in_service_entity
  1101. * cannot be considered as
  1102. * next_in_service. See the comments on the
  1103. * definition of next_in_service for details.
  1104. */
  1105. break;
  1106. }
  1107. /*
  1108. * If we get here, then the parent is no more
  1109. * backlogged and we need to propagate the
  1110. * deactivation upwards. Thus let the loop go on.
  1111. */
  1112. /*
  1113. * Also let parent be queued into the idle tree on
  1114. * deactivation, to preserve service guarantees, and
  1115. * assuming that who invoked this function does not
  1116. * need parent entities too to be removed completely.
  1117. */
  1118. ins_into_idle_tree = true;
  1119. }
  1120. /*
  1121. * If the deactivation loop is fully executed, then there are
  1122. * no more entities to touch and next loop is not executed at
  1123. * all. Otherwise, requeue remaining entities if they are
  1124. * about to stop receiving service, or reposition them if this
  1125. * is not the case.
  1126. */
  1127. entity = parent;
  1128. for_each_entity(entity) {
  1129. /*
  1130. * Invoke __bfq_requeue_entity on entity, even if
  1131. * already active, to requeue/reposition it in the
  1132. * active tree (because sd->next_in_service has
  1133. * changed)
  1134. */
  1135. __bfq_requeue_entity(entity);
  1136. sd = entity->sched_data;
  1137. if (!bfq_update_next_in_service(sd, entity, expiration) &&
  1138. !expiration)
  1139. /*
  1140. * next_in_service unchanged or not causing
  1141. * any change in entity->parent->sd, and no
  1142. * requeueing needed for expiration: stop
  1143. * here.
  1144. */
  1145. break;
  1146. }
  1147. }
  1148. /**
  1149. * bfq_calc_vtime_jump - compute the value to which the vtime should jump,
  1150. * if needed, to have at least one entity eligible.
  1151. * @st: the service tree to act upon.
  1152. *
  1153. * Assumes that st is not empty.
  1154. */
  1155. static u64 bfq_calc_vtime_jump(struct bfq_service_tree *st)
  1156. {
  1157. struct bfq_entity *root_entity = bfq_root_active_entity(&st->active);
  1158. if (bfq_gt(root_entity->min_start, st->vtime))
  1159. return root_entity->min_start;
  1160. return st->vtime;
  1161. }
  1162. static void bfq_update_vtime(struct bfq_service_tree *st, u64 new_value)
  1163. {
  1164. if (new_value > st->vtime) {
  1165. st->vtime = new_value;
  1166. bfq_forget_idle(st);
  1167. }
  1168. }
  1169. /**
  1170. * bfq_first_active_entity - find the eligible entity with
  1171. * the smallest finish time
  1172. * @st: the service tree to select from.
  1173. * @vtime: the system virtual to use as a reference for eligibility
  1174. *
  1175. * This function searches the first schedulable entity, starting from the
  1176. * root of the tree and going on the left every time on this side there is
  1177. * a subtree with at least one eligible (start <= vtime) entity. The path on
  1178. * the right is followed only if a) the left subtree contains no eligible
  1179. * entities and b) no eligible entity has been found yet.
  1180. */
  1181. static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st,
  1182. u64 vtime)
  1183. {
  1184. struct bfq_entity *entry, *first = NULL;
  1185. struct rb_node *node = st->active.rb_node;
  1186. while (node) {
  1187. entry = rb_entry(node, struct bfq_entity, rb_node);
  1188. left:
  1189. if (!bfq_gt(entry->start, vtime))
  1190. first = entry;
  1191. if (node->rb_left) {
  1192. entry = rb_entry(node->rb_left,
  1193. struct bfq_entity, rb_node);
  1194. if (!bfq_gt(entry->min_start, vtime)) {
  1195. node = node->rb_left;
  1196. goto left;
  1197. }
  1198. }
  1199. if (first)
  1200. break;
  1201. node = node->rb_right;
  1202. }
  1203. return first;
  1204. }
  1205. /**
  1206. * __bfq_lookup_next_entity - return the first eligible entity in @st.
  1207. * @st: the service tree.
  1208. *
  1209. * If there is no in-service entity for the sched_data st belongs to,
  1210. * then return the entity that will be set in service if:
  1211. * 1) the parent entity this st belongs to is set in service;
  1212. * 2) no entity belonging to such parent entity undergoes a state change
  1213. * that would influence the timestamps of the entity (e.g., becomes idle,
  1214. * becomes backlogged, changes its budget, ...).
  1215. *
  1216. * In this first case, update the virtual time in @st too (see the
  1217. * comments on this update inside the function).
  1218. *
  1219. * In constrast, if there is an in-service entity, then return the
  1220. * entity that would be set in service if not only the above
  1221. * conditions, but also the next one held true: the currently
  1222. * in-service entity, on expiration,
  1223. * 1) gets a finish time equal to the current one, or
  1224. * 2) is not eligible any more, or
  1225. * 3) is idle.
  1226. */
  1227. static struct bfq_entity *
  1228. __bfq_lookup_next_entity(struct bfq_service_tree *st, bool in_service)
  1229. {
  1230. struct bfq_entity *entity;
  1231. u64 new_vtime;
  1232. if (RB_EMPTY_ROOT(&st->active))
  1233. return NULL;
  1234. /*
  1235. * Get the value of the system virtual time for which at
  1236. * least one entity is eligible.
  1237. */
  1238. new_vtime = bfq_calc_vtime_jump(st);
  1239. /*
  1240. * If there is no in-service entity for the sched_data this
  1241. * active tree belongs to, then push the system virtual time
  1242. * up to the value that guarantees that at least one entity is
  1243. * eligible. If, instead, there is an in-service entity, then
  1244. * do not make any such update, because there is already an
  1245. * eligible entity, namely the in-service one (even if the
  1246. * entity is not on st, because it was extracted when set in
  1247. * service).
  1248. */
  1249. if (!in_service)
  1250. bfq_update_vtime(st, new_vtime);
  1251. entity = bfq_first_active_entity(st, new_vtime);
  1252. return entity;
  1253. }
  1254. /**
  1255. * bfq_lookup_next_entity - return the first eligible entity in @sd.
  1256. * @sd: the sched_data.
  1257. * @expiration: true if we are on the expiration path of the in-service queue
  1258. *
  1259. * This function is invoked when there has been a change in the trees
  1260. * for sd, and we need to know what is the new next entity to serve
  1261. * after this change.
  1262. */
  1263. static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
  1264. bool expiration)
  1265. {
  1266. struct bfq_service_tree *st = sd->service_tree;
  1267. struct bfq_service_tree *idle_class_st = st + (BFQ_IOPRIO_CLASSES - 1);
  1268. struct bfq_entity *entity = NULL;
  1269. int class_idx = 0;
  1270. /*
  1271. * Choose from idle class, if needed to guarantee a minimum
  1272. * bandwidth to this class (and if there is some active entity
  1273. * in idle class). This should also mitigate
  1274. * priority-inversion problems in case a low priority task is
  1275. * holding file system resources.
  1276. */
  1277. if (time_is_before_jiffies(sd->bfq_class_idle_last_service +
  1278. BFQ_CL_IDLE_TIMEOUT)) {
  1279. if (!RB_EMPTY_ROOT(&idle_class_st->active))
  1280. class_idx = BFQ_IOPRIO_CLASSES - 1;
  1281. /* About to be served if backlogged, or not yet backlogged */
  1282. sd->bfq_class_idle_last_service = jiffies;
  1283. }
  1284. /*
  1285. * Find the next entity to serve for the highest-priority
  1286. * class, unless the idle class needs to be served.
  1287. */
  1288. for (; class_idx < BFQ_IOPRIO_CLASSES; class_idx++) {
  1289. /*
  1290. * If expiration is true, then bfq_lookup_next_entity
  1291. * is being invoked as a part of the expiration path
  1292. * of the in-service queue. In this case, even if
  1293. * sd->in_service_entity is not NULL,
  1294. * sd->in_service_entiy at this point is actually not
  1295. * in service any more, and, if needed, has already
  1296. * been properly queued or requeued into the right
  1297. * tree. The reason why sd->in_service_entity is still
  1298. * not NULL here, even if expiration is true, is that
  1299. * sd->in_service_entiy is reset as a last step in the
  1300. * expiration path. So, if expiration is true, tell
  1301. * __bfq_lookup_next_entity that there is no
  1302. * sd->in_service_entity.
  1303. */
  1304. entity = __bfq_lookup_next_entity(st + class_idx,
  1305. sd->in_service_entity &&
  1306. !expiration);
  1307. if (entity)
  1308. break;
  1309. }
  1310. if (!entity)
  1311. return NULL;
  1312. return entity;
  1313. }
  1314. bool next_queue_may_preempt(struct bfq_data *bfqd)
  1315. {
  1316. struct bfq_sched_data *sd = &bfqd->root_group->sched_data;
  1317. return sd->next_in_service != sd->in_service_entity;
  1318. }
  1319. /*
  1320. * Get next queue for service.
  1321. */
  1322. struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
  1323. {
  1324. struct bfq_entity *entity = NULL;
  1325. struct bfq_sched_data *sd;
  1326. struct bfq_queue *bfqq;
  1327. if (bfqd->busy_queues == 0)
  1328. return NULL;
  1329. /*
  1330. * Traverse the path from the root to the leaf entity to
  1331. * serve. Set in service all the entities visited along the
  1332. * way.
  1333. */
  1334. sd = &bfqd->root_group->sched_data;
  1335. for (; sd ; sd = entity->my_sched_data) {
  1336. /*
  1337. * WARNING. We are about to set the in-service entity
  1338. * to sd->next_in_service, i.e., to the (cached) value
  1339. * returned by bfq_lookup_next_entity(sd) the last
  1340. * time it was invoked, i.e., the last time when the
  1341. * service order in sd changed as a consequence of the
  1342. * activation or deactivation of an entity. In this
  1343. * respect, if we execute bfq_lookup_next_entity(sd)
  1344. * in this very moment, it may, although with low
  1345. * probability, yield a different entity than that
  1346. * pointed to by sd->next_in_service. This rare event
  1347. * happens in case there was no CLASS_IDLE entity to
  1348. * serve for sd when bfq_lookup_next_entity(sd) was
  1349. * invoked for the last time, while there is now one
  1350. * such entity.
  1351. *
  1352. * If the above event happens, then the scheduling of
  1353. * such entity in CLASS_IDLE is postponed until the
  1354. * service of the sd->next_in_service entity
  1355. * finishes. In fact, when the latter is expired,
  1356. * bfq_lookup_next_entity(sd) gets called again,
  1357. * exactly to update sd->next_in_service.
  1358. */
  1359. /* Make next_in_service entity become in_service_entity */
  1360. entity = sd->next_in_service;
  1361. sd->in_service_entity = entity;
  1362. /*
  1363. * Reset the accumulator of the amount of service that
  1364. * the entity is about to receive.
  1365. */
  1366. entity->service = 0;
  1367. /*
  1368. * If entity is no longer a candidate for next
  1369. * service, then it must be extracted from its active
  1370. * tree, so as to make sure that it won't be
  1371. * considered when computing next_in_service. See the
  1372. * comments on the function
  1373. * bfq_no_longer_next_in_service() for details.
  1374. */
  1375. if (bfq_no_longer_next_in_service(entity))
  1376. bfq_active_extract(bfq_entity_service_tree(entity),
  1377. entity);
  1378. /*
  1379. * Even if entity is not to be extracted according to
  1380. * the above check, a descendant entity may get
  1381. * extracted in one of the next iterations of this
  1382. * loop. Such an event could cause a change in
  1383. * next_in_service for the level of the descendant
  1384. * entity, and thus possibly back to this level.
  1385. *
  1386. * However, we cannot perform the resulting needed
  1387. * update of next_in_service for this level before the
  1388. * end of the whole loop, because, to know which is
  1389. * the correct next-to-serve candidate entity for each
  1390. * level, we need first to find the leaf entity to set
  1391. * in service. In fact, only after we know which is
  1392. * the next-to-serve leaf entity, we can discover
  1393. * whether the parent entity of the leaf entity
  1394. * becomes the next-to-serve, and so on.
  1395. */
  1396. }
  1397. bfqq = bfq_entity_to_bfqq(entity);
  1398. /*
  1399. * We can finally update all next-to-serve entities along the
  1400. * path from the leaf entity just set in service to the root.
  1401. */
  1402. for_each_entity(entity) {
  1403. struct bfq_sched_data *sd = entity->sched_data;
  1404. if (!bfq_update_next_in_service(sd, NULL, false))
  1405. break;
  1406. }
  1407. return bfqq;
  1408. }
  1409. void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
  1410. {
  1411. struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue;
  1412. struct bfq_entity *in_serv_entity = &in_serv_bfqq->entity;
  1413. struct bfq_entity *entity = in_serv_entity;
  1414. bfq_clear_bfqq_wait_request(in_serv_bfqq);
  1415. hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
  1416. bfqd->in_service_queue = NULL;
  1417. /*
  1418. * When this function is called, all in-service entities have
  1419. * been properly deactivated or requeued, so we can safely
  1420. * execute the final step: reset in_service_entity along the
  1421. * path from entity to the root.
  1422. */
  1423. for_each_entity(entity)
  1424. entity->sched_data->in_service_entity = NULL;
  1425. /*
  1426. * in_serv_entity is no longer in service, so, if it is in no
  1427. * service tree either, then release the service reference to
  1428. * the queue it represents (taken with bfq_get_entity).
  1429. */
  1430. if (!in_serv_entity->on_st)
  1431. bfq_put_queue(in_serv_bfqq);
  1432. }
  1433. void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  1434. bool ins_into_idle_tree, bool expiration)
  1435. {
  1436. struct bfq_entity *entity = &bfqq->entity;
  1437. bfq_deactivate_entity(entity, ins_into_idle_tree, expiration);
  1438. }
  1439. void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
  1440. {
  1441. struct bfq_entity *entity = &bfqq->entity;
  1442. bfq_activate_requeue_entity(entity, bfq_bfqq_non_blocking_wait_rq(bfqq),
  1443. false, false);
  1444. bfq_clear_bfqq_non_blocking_wait_rq(bfqq);
  1445. }
  1446. void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  1447. bool expiration)
  1448. {
  1449. struct bfq_entity *entity = &bfqq->entity;
  1450. bfq_activate_requeue_entity(entity, false,
  1451. bfqq == bfqd->in_service_queue, expiration);
  1452. }
  1453. /*
  1454. * Called when the bfqq no longer has requests pending, remove it from
  1455. * the service tree. As a special case, it can be invoked during an
  1456. * expiration.
  1457. */
  1458. void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  1459. bool expiration)
  1460. {
  1461. bfq_log_bfqq(bfqd, bfqq, "del from busy");
  1462. bfq_clear_bfqq_busy(bfqq);
  1463. bfqd->busy_queues--;
  1464. if (!bfqq->dispatched)
  1465. bfq_weights_tree_remove(bfqd, &bfqq->entity,
  1466. &bfqd->queue_weights_tree);
  1467. if (bfqq->wr_coeff > 1)
  1468. bfqd->wr_busy_queues--;
  1469. bfqg_stats_update_dequeue(bfqq_group(bfqq));
  1470. bfq_deactivate_bfqq(bfqd, bfqq, true, expiration);
  1471. }
  1472. /*
  1473. * Called when an inactive queue receives a new request.
  1474. */
  1475. void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
  1476. {
  1477. bfq_log_bfqq(bfqd, bfqq, "add to busy");
  1478. bfq_activate_bfqq(bfqd, bfqq);
  1479. bfq_mark_bfqq_busy(bfqq);
  1480. bfqd->busy_queues++;
  1481. if (!bfqq->dispatched)
  1482. if (bfqq->wr_coeff == 1)
  1483. bfq_weights_tree_add(bfqd, &bfqq->entity,
  1484. &bfqd->queue_weights_tree);
  1485. if (bfqq->wr_coeff > 1)
  1486. bfqd->wr_busy_queues++;
  1487. }