bfq-cgroup.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902
  1. /*
  2. * BFQ: CGROUPS support.
  3. *
  4. * Based on ideas and code from CFQ:
  5. * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
  6. *
  7. * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
  8. * Paolo Valente <paolo.valente@unimore.it>
  9. *
  10. * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
  11. *
  12. * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
  13. * file.
  14. */
  15. #ifdef CONFIG_CGROUP_BFQIO
  16. static struct bfqio_cgroup bfqio_root_cgroup = {
  17. .weight = BFQ_DEFAULT_GRP_WEIGHT,
  18. .ioprio = BFQ_DEFAULT_GRP_IOPRIO,
  19. .ioprio_class = BFQ_DEFAULT_GRP_CLASS,
  20. };
  21. static inline void bfq_init_entity(struct bfq_entity *entity,
  22. struct bfq_group *bfqg)
  23. {
  24. entity->weight = entity->new_weight;
  25. entity->orig_weight = entity->new_weight;
  26. entity->ioprio = entity->new_ioprio;
  27. entity->ioprio_class = entity->new_ioprio_class;
  28. entity->parent = bfqg->my_entity;
  29. entity->sched_data = &bfqg->sched_data;
  30. }
  31. static struct bfqio_cgroup *cgroup_to_bfqio(struct cgroup *cgroup)
  32. {
  33. return container_of(cgroup_subsys_state(cgroup, bfqio_subsys_id),
  34. struct bfqio_cgroup, css);
  35. }
  36. /*
  37. * Search the bfq_group for bfqd into the hash table (by now only a list)
  38. * of bgrp. Must be called under rcu_read_lock().
  39. */
  40. static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp,
  41. struct bfq_data *bfqd)
  42. {
  43. struct bfq_group *bfqg;
  44. struct hlist_node *n;
  45. void *key;
  46. hlist_for_each_entry_rcu(bfqg, n, &bgrp->group_data, group_node) {
  47. key = rcu_dereference(bfqg->bfqd);
  48. if (key == bfqd)
  49. return bfqg;
  50. }
  51. return NULL;
  52. }
  53. static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp,
  54. struct bfq_group *bfqg)
  55. {
  56. struct bfq_entity *entity = &bfqg->entity;
  57. /*
  58. * If the weight of the entity has never been set via the sysfs
  59. * interface, then bgrp->weight == 0. In this case we initialize
  60. * the weight from the current ioprio value. Otherwise, the group
  61. * weight, if set, has priority over the ioprio value.
  62. */
  63. if (bgrp->weight == 0) {
  64. entity->new_weight = bfq_ioprio_to_weight(bgrp->ioprio);
  65. entity->new_ioprio = bgrp->ioprio;
  66. } else {
  67. if (bgrp->weight < BFQ_MIN_WEIGHT ||
  68. bgrp->weight > BFQ_MAX_WEIGHT) {
  69. printk(KERN_CRIT "bfq_group_init_entity: "
  70. "bgrp->weight %d\n", bgrp->weight);
  71. BUG();
  72. }
  73. entity->new_weight = bgrp->weight;
  74. entity->new_ioprio = bfq_weight_to_ioprio(bgrp->weight);
  75. }
  76. entity->orig_weight = entity->weight = entity->new_weight;
  77. entity->ioprio = entity->new_ioprio;
  78. entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class;
  79. entity->my_sched_data = &bfqg->sched_data;
  80. bfqg->active_entities = 0;
  81. }
  82. static inline void bfq_group_set_parent(struct bfq_group *bfqg,
  83. struct bfq_group *parent)
  84. {
  85. struct bfq_entity *entity;
  86. BUG_ON(parent == NULL);
  87. BUG_ON(bfqg == NULL);
  88. entity = &bfqg->entity;
  89. entity->parent = parent->my_entity;
  90. entity->sched_data = &parent->sched_data;
  91. }
  92. /**
  93. * bfq_group_chain_alloc - allocate a chain of groups.
  94. * @bfqd: queue descriptor.
  95. * @cgroup: the leaf cgroup this chain starts from.
  96. *
  97. * Allocate a chain of groups starting from the one belonging to
  98. * @cgroup up to the root cgroup. Stop if a cgroup on the chain
  99. * to the root has already an allocated group on @bfqd.
  100. */
  101. static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd,
  102. struct cgroup *cgroup)
  103. {
  104. struct bfqio_cgroup *bgrp;
  105. struct bfq_group *bfqg, *prev = NULL, *leaf = NULL;
  106. for (; cgroup != NULL; cgroup = cgroup->parent) {
  107. bgrp = cgroup_to_bfqio(cgroup);
  108. bfqg = bfqio_lookup_group(bgrp, bfqd);
  109. if (bfqg != NULL) {
  110. /*
  111. * All the cgroups in the path from there to the
  112. * root must have a bfq_group for bfqd, so we don't
  113. * need any more allocations.
  114. */
  115. break;
  116. }
  117. bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC);
  118. if (bfqg == NULL)
  119. goto cleanup;
  120. bfq_group_init_entity(bgrp, bfqg);
  121. bfqg->my_entity = &bfqg->entity;
  122. if (leaf == NULL) {
  123. leaf = bfqg;
  124. prev = leaf;
  125. } else {
  126. bfq_group_set_parent(prev, bfqg);
  127. /*
  128. * Build a list of allocated nodes using the bfqd
  129. * filed, that is still unused and will be
  130. * initialized only after the node will be
  131. * connected.
  132. */
  133. prev->bfqd = bfqg;
  134. prev = bfqg;
  135. }
  136. }
  137. return leaf;
  138. cleanup:
  139. while (leaf != NULL) {
  140. prev = leaf;
  141. leaf = leaf->bfqd;
  142. kfree(prev);
  143. }
  144. return NULL;
  145. }
  146. /**
  147. * bfq_group_chain_link - link an allocated group chain to a cgroup
  148. * hierarchy.
  149. * @bfqd: the queue descriptor.
  150. * @cgroup: the leaf cgroup to start from.
  151. * @leaf: the leaf group (to be associated to @cgroup).
  152. *
  153. * Try to link a chain of groups to a cgroup hierarchy, connecting the
  154. * nodes bottom-up, so we can be sure that when we find a cgroup in the
  155. * hierarchy that already as a group associated to @bfqd all the nodes
  156. * in the path to the root cgroup have one too.
  157. *
  158. * On locking: the queue lock protects the hierarchy (there is a hierarchy
  159. * per device) while the bfqio_cgroup lock protects the list of groups
  160. * belonging to the same cgroup.
  161. */
  162. static void bfq_group_chain_link(struct bfq_data *bfqd, struct cgroup *cgroup,
  163. struct bfq_group *leaf)
  164. {
  165. struct bfqio_cgroup *bgrp;
  166. struct bfq_group *bfqg, *next, *prev = NULL;
  167. unsigned long flags;
  168. assert_spin_locked(bfqd->queue->queue_lock);
  169. for (; cgroup != NULL && leaf != NULL; cgroup = cgroup->parent) {
  170. bgrp = cgroup_to_bfqio(cgroup);
  171. next = leaf->bfqd;
  172. bfqg = bfqio_lookup_group(bgrp, bfqd);
  173. BUG_ON(bfqg != NULL);
  174. spin_lock_irqsave(&bgrp->lock, flags);
  175. rcu_assign_pointer(leaf->bfqd, bfqd);
  176. hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data);
  177. hlist_add_head(&leaf->bfqd_node, &bfqd->group_list);
  178. spin_unlock_irqrestore(&bgrp->lock, flags);
  179. prev = leaf;
  180. leaf = next;
  181. }
  182. BUG_ON(cgroup == NULL && leaf != NULL);
  183. if (cgroup != NULL && prev != NULL) {
  184. bgrp = cgroup_to_bfqio(cgroup);
  185. bfqg = bfqio_lookup_group(bgrp, bfqd);
  186. bfq_group_set_parent(prev, bfqg);
  187. }
  188. }
  189. /**
  190. * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup.
  191. * @bfqd: queue descriptor.
  192. * @cgroup: cgroup being searched for.
  193. *
  194. * Return a group associated to @bfqd in @cgroup, allocating one if
  195. * necessary. When a group is returned all the cgroups in the path
  196. * to the root have a group associated to @bfqd.
  197. *
  198. * If the allocation fails, return the root group: this breaks guarantees
  199. * but is a safe fallback. If this loss becomes a problem it can be
  200. * mitigated using the equivalent weight (given by the product of the
  201. * weights of the groups in the path from @group to the root) in the
  202. * root scheduler.
  203. *
  204. * We allocate all the missing nodes in the path from the leaf cgroup
  205. * to the root and we connect the nodes only after all the allocations
  206. * have been successful.
  207. */
  208. static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
  209. struct cgroup *cgroup)
  210. {
  211. struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup);
  212. struct bfq_group *bfqg;
  213. bfqg = bfqio_lookup_group(bgrp, bfqd);
  214. if (bfqg != NULL)
  215. return bfqg;
  216. bfqg = bfq_group_chain_alloc(bfqd, cgroup);
  217. if (bfqg != NULL)
  218. bfq_group_chain_link(bfqd, cgroup, bfqg);
  219. else
  220. bfqg = bfqd->root_group;
  221. return bfqg;
  222. }
  223. /**
  224. * bfq_bfqq_move - migrate @bfqq to @bfqg.
  225. * @bfqd: queue descriptor.
  226. * @bfqq: the queue to move.
  227. * @entity: @bfqq's entity.
  228. * @bfqg: the group to move to.
  229. *
  230. * Move @bfqq to @bfqg, deactivating it from its old group and reactivating
  231. * it on the new one. Avoid putting the entity on the old group idle tree.
  232. *
  233. * Must be called under the queue lock; the cgroup owning @bfqg must
  234. * not disappear (by now this just means that we are called under
  235. * rcu_read_lock()).
  236. */
  237. static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  238. struct bfq_entity *entity, struct bfq_group *bfqg)
  239. {
  240. int busy, resume;
  241. busy = bfq_bfqq_busy(bfqq);
  242. resume = !RB_EMPTY_ROOT(&bfqq->sort_list);
  243. BUG_ON(resume && !entity->on_st);
  244. BUG_ON(busy && !resume && entity->on_st &&
  245. bfqq != bfqd->in_service_queue);
  246. if (busy) {
  247. BUG_ON(atomic_read(&bfqq->ref) < 2);
  248. if (!resume)
  249. bfq_del_bfqq_busy(bfqd, bfqq, 0);
  250. else
  251. bfq_deactivate_bfqq(bfqd, bfqq, 0);
  252. } else if (entity->on_st)
  253. bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
  254. /*
  255. * Here we use a reference to bfqg. We don't need a refcounter
  256. * as the cgroup reference will not be dropped, so that its
  257. * destroy() callback will not be invoked.
  258. */
  259. entity->parent = bfqg->my_entity;
  260. entity->sched_data = &bfqg->sched_data;
  261. if (busy && resume)
  262. bfq_activate_bfqq(bfqd, bfqq);
  263. if (bfqd->in_service_queue == NULL && !bfqd->rq_in_driver)
  264. bfq_schedule_dispatch(bfqd);
  265. }
  266. /**
  267. * __bfq_bic_change_cgroup - move @bic to @cgroup.
  268. * @bfqd: the queue descriptor.
  269. * @bic: the bic to move.
  270. * @cgroup: the cgroup to move to.
  271. *
  272. * Move bic to cgroup, assuming that bfqd->queue is locked; the caller
  273. * has to make sure that the reference to cgroup is valid across the call.
  274. *
  275. * NOTE: an alternative approach might have been to store the current
  276. * cgroup in bfqq and getting a reference to it, reducing the lookup
  277. * time here, at the price of slightly more complex code.
  278. */
  279. static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
  280. struct bfq_io_cq *bic,
  281. struct cgroup *cgroup)
  282. {
  283. struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
  284. struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
  285. struct bfq_entity *entity;
  286. struct bfq_group *bfqg;
  287. struct bfqio_cgroup *bgrp;
  288. bgrp = cgroup_to_bfqio(cgroup);
  289. bfqg = bfq_find_alloc_group(bfqd, cgroup);
  290. if (async_bfqq != NULL) {
  291. entity = &async_bfqq->entity;
  292. if (entity->sched_data != &bfqg->sched_data) {
  293. bic_set_bfqq(bic, NULL, 0);
  294. bfq_log_bfqq(bfqd, async_bfqq,
  295. "bic_change_group: %p %d",
  296. async_bfqq, atomic_read(&async_bfqq->ref));
  297. bfq_put_queue(async_bfqq);
  298. }
  299. }
  300. if (sync_bfqq != NULL) {
  301. entity = &sync_bfqq->entity;
  302. if (entity->sched_data != &bfqg->sched_data)
  303. bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg);
  304. }
  305. return bfqg;
  306. }
  307. /**
  308. * bfq_bic_change_cgroup - move @bic to @cgroup.
  309. * @bic: the bic being migrated.
  310. * @cgroup: the destination cgroup.
  311. *
  312. * When the task owning @bic is moved to @cgroup, @bic is immediately
  313. * moved into its new parent group.
  314. */
  315. static void bfq_bic_change_cgroup(struct bfq_io_cq *bic,
  316. struct cgroup *cgroup)
  317. {
  318. struct bfq_data *bfqd;
  319. unsigned long uninitialized_var(flags);
  320. bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data),
  321. &flags);
  322. if (bfqd != NULL) {
  323. __bfq_bic_change_cgroup(bfqd, bic, cgroup);
  324. bfq_put_bfqd_unlock(bfqd, &flags);
  325. }
  326. }
  327. /**
  328. * bfq_bic_update_cgroup - update the cgroup of @bic.
  329. * @bic: the @bic to update.
  330. *
  331. * Make sure that @bic is enqueued in the cgroup of the current task.
  332. * We need this in addition to moving bics during the cgroup attach
  333. * phase because the task owning @bic could be at its first disk
  334. * access or we may end up in the root cgroup as the result of a
  335. * memory allocation failure and here we try to move to the right
  336. * group.
  337. *
  338. * Must be called under the queue lock. It is safe to use the returned
  339. * value even after the rcu_read_unlock() as the migration/destruction
  340. * paths act under the queue lock too. IOW it is impossible to race with
  341. * group migration/destruction and end up with an invalid group as:
  342. * a) here cgroup has not yet been destroyed, nor its destroy callback
  343. * has started execution, as current holds a reference to it,
  344. * b) if it is destroyed after rcu_read_unlock() [after current is
  345. * migrated to a different cgroup] its attach() callback will have
  346. * taken care of remove all the references to the old cgroup data.
  347. */
  348. static struct bfq_group *bfq_bic_update_cgroup(struct bfq_io_cq *bic)
  349. {
  350. struct bfq_data *bfqd = bic_to_bfqd(bic);
  351. struct bfq_group *bfqg;
  352. struct cgroup *cgroup;
  353. BUG_ON(bfqd == NULL);
  354. rcu_read_lock();
  355. cgroup = task_cgroup(current, bfqio_subsys_id);
  356. bfqg = __bfq_bic_change_cgroup(bfqd, bic, cgroup);
  357. rcu_read_unlock();
  358. return bfqg;
  359. }
  360. /**
  361. * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st.
  362. * @st: the service tree being flushed.
  363. */
  364. static inline void bfq_flush_idle_tree(struct bfq_service_tree *st)
  365. {
  366. struct bfq_entity *entity = st->first_idle;
  367. for (; entity != NULL; entity = st->first_idle)
  368. __bfq_deactivate_entity(entity, 0);
  369. }
  370. /**
  371. * bfq_reparent_leaf_entity - move leaf entity to the root_group.
  372. * @bfqd: the device data structure with the root group.
  373. * @entity: the entity to move.
  374. */
  375. static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
  376. struct bfq_entity *entity)
  377. {
  378. struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
  379. BUG_ON(bfqq == NULL);
  380. bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group);
  381. return;
  382. }
  383. /**
  384. * bfq_reparent_active_entities - move to the root group all active
  385. * entities.
  386. * @bfqd: the device data structure with the root group.
  387. * @bfqg: the group to move from.
  388. * @st: the service tree with the entities.
  389. *
  390. * Needs queue_lock to be taken and reference to be valid over the call.
  391. */
  392. static inline void bfq_reparent_active_entities(struct bfq_data *bfqd,
  393. struct bfq_group *bfqg,
  394. struct bfq_service_tree *st)
  395. {
  396. struct rb_root *active = &st->active;
  397. struct bfq_entity *entity = NULL;
  398. if (!RB_EMPTY_ROOT(&st->active))
  399. entity = bfq_entity_of(rb_first(active));
  400. for (; entity != NULL; entity = bfq_entity_of(rb_first(active)))
  401. bfq_reparent_leaf_entity(bfqd, entity);
  402. if (bfqg->sched_data.in_service_entity != NULL)
  403. bfq_reparent_leaf_entity(bfqd,
  404. bfqg->sched_data.in_service_entity);
  405. return;
  406. }
  407. /**
  408. * bfq_destroy_group - destroy @bfqg.
  409. * @bgrp: the bfqio_cgroup containing @bfqg.
  410. * @bfqg: the group being destroyed.
  411. *
  412. * Destroy @bfqg, making sure that it is not referenced from its parent.
  413. */
  414. static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg)
  415. {
  416. struct bfq_data *bfqd;
  417. struct bfq_service_tree *st;
  418. struct bfq_entity *entity = bfqg->my_entity;
  419. unsigned long uninitialized_var(flags);
  420. int i;
  421. hlist_del(&bfqg->group_node);
  422. /*
  423. * Empty all service_trees belonging to this group before
  424. * deactivating the group itself.
  425. */
  426. for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
  427. st = bfqg->sched_data.service_tree + i;
  428. /*
  429. * The idle tree may still contain bfq_queues belonging
  430. * to exited task because they never migrated to a different
  431. * cgroup from the one being destroyed now. No one else
  432. * can access them so it's safe to act without any lock.
  433. */
  434. bfq_flush_idle_tree(st);
  435. /*
  436. * It may happen that some queues are still active
  437. * (busy) upon group destruction (if the corresponding
  438. * processes have been forced to terminate). We move
  439. * all the leaf entities corresponding to these queues
  440. * to the root_group.
  441. * Also, it may happen that the group has an entity
  442. * in service, which is disconnected from the active
  443. * tree: it must be moved, too.
  444. * There is no need to put the sync queues, as the
  445. * scheduler has taken no reference.
  446. */
  447. bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);
  448. if (bfqd != NULL) {
  449. bfq_reparent_active_entities(bfqd, bfqg, st);
  450. bfq_put_bfqd_unlock(bfqd, &flags);
  451. }
  452. BUG_ON(!RB_EMPTY_ROOT(&st->active));
  453. BUG_ON(!RB_EMPTY_ROOT(&st->idle));
  454. }
  455. BUG_ON(bfqg->sched_data.next_in_service != NULL);
  456. BUG_ON(bfqg->sched_data.in_service_entity != NULL);
  457. /*
  458. * We may race with device destruction, take extra care when
  459. * dereferencing bfqg->bfqd.
  460. */
  461. bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);
  462. if (bfqd != NULL) {
  463. hlist_del(&bfqg->bfqd_node);
  464. __bfq_deactivate_entity(entity, 0);
  465. bfq_put_async_queues(bfqd, bfqg);
  466. bfq_put_bfqd_unlock(bfqd, &flags);
  467. }
  468. BUG_ON(entity->tree != NULL);
  469. /*
  470. * No need to defer the kfree() to the end of the RCU grace
  471. * period: we are called from the destroy() callback of our
  472. * cgroup, so we can be sure that no one is a) still using
  473. * this cgroup or b) doing lookups in it.
  474. */
  475. kfree(bfqg);
  476. }
  477. static void bfq_end_wr_async(struct bfq_data *bfqd)
  478. {
  479. struct hlist_node *pos, *n;
  480. struct bfq_group *bfqg;
  481. hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node)
  482. bfq_end_wr_async_queues(bfqd, bfqg);
  483. bfq_end_wr_async_queues(bfqd, bfqd->root_group);
  484. }
  485. /**
  486. * bfq_disconnect_groups - disconnect @bfqd from all its groups.
  487. * @bfqd: the device descriptor being exited.
  488. *
  489. * When the device exits we just make sure that no lookup can return
  490. * the now unused group structures. They will be deallocated on cgroup
  491. * destruction.
  492. */
  493. static void bfq_disconnect_groups(struct bfq_data *bfqd)
  494. {
  495. struct hlist_node *pos, *n;
  496. struct bfq_group *bfqg;
  497. bfq_log(bfqd, "disconnect_groups beginning");
  498. hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node) {
  499. hlist_del(&bfqg->bfqd_node);
  500. __bfq_deactivate_entity(bfqg->my_entity, 0);
  501. /*
  502. * Don't remove from the group hash, just set an
  503. * invalid key. No lookups can race with the
  504. * assignment as bfqd is being destroyed; this
  505. * implies also that new elements cannot be added
  506. * to the list.
  507. */
  508. rcu_assign_pointer(bfqg->bfqd, NULL);
  509. bfq_log(bfqd, "disconnect_groups: put async for group %p",
  510. bfqg);
  511. bfq_put_async_queues(bfqd, bfqg);
  512. }
  513. }
  514. static inline void bfq_free_root_group(struct bfq_data *bfqd)
  515. {
  516. struct bfqio_cgroup *bgrp = &bfqio_root_cgroup;
  517. struct bfq_group *bfqg = bfqd->root_group;
  518. bfq_put_async_queues(bfqd, bfqg);
  519. spin_lock_irq(&bgrp->lock);
  520. hlist_del_rcu(&bfqg->group_node);
  521. spin_unlock_irq(&bgrp->lock);
  522. /*
  523. * No need to synchronize_rcu() here: since the device is gone
  524. * there cannot be any read-side access to its root_group.
  525. */
  526. kfree(bfqg);
  527. }
  528. static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)
  529. {
  530. struct bfq_group *bfqg;
  531. struct bfqio_cgroup *bgrp;
  532. int i;
  533. bfqg = kzalloc_node(sizeof(*bfqg), GFP_KERNEL, node);
  534. if (bfqg == NULL)
  535. return NULL;
  536. bfqg->entity.parent = NULL;
  537. for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
  538. bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
  539. bgrp = &bfqio_root_cgroup;
  540. spin_lock_irq(&bgrp->lock);
  541. rcu_assign_pointer(bfqg->bfqd, bfqd);
  542. hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data);
  543. spin_unlock_irq(&bgrp->lock);
  544. return bfqg;
  545. }
  546. #define SHOW_FUNCTION(__VAR) \
  547. static u64 bfqio_cgroup_##__VAR##_read(struct cgroup *cgroup, \
  548. struct cftype *cftype) \
  549. { \
  550. struct bfqio_cgroup *bgrp; \
  551. u64 ret; \
  552. \
  553. if (!cgroup_lock_live_group(cgroup)) \
  554. return -ENODEV; \
  555. \
  556. bgrp = cgroup_to_bfqio(cgroup); \
  557. spin_lock_irq(&bgrp->lock); \
  558. ret = bgrp->__VAR; \
  559. spin_unlock_irq(&bgrp->lock); \
  560. \
  561. cgroup_unlock(); \
  562. \
  563. return ret; \
  564. }
  565. SHOW_FUNCTION(weight);
  566. SHOW_FUNCTION(ioprio);
  567. SHOW_FUNCTION(ioprio_class);
  568. #undef SHOW_FUNCTION
  569. #define STORE_FUNCTION(__VAR, __MIN, __MAX) \
  570. static int bfqio_cgroup_##__VAR##_write(struct cgroup *cgroup, \
  571. struct cftype *cftype, \
  572. u64 val) \
  573. { \
  574. struct bfqio_cgroup *bgrp; \
  575. struct bfq_group *bfqg; \
  576. struct hlist_node *n; \
  577. \
  578. if (val < (__MIN) || val > (__MAX)) \
  579. return -EINVAL; \
  580. \
  581. if (!cgroup_lock_live_group(cgroup)) \
  582. return -ENODEV; \
  583. \
  584. bgrp = cgroup_to_bfqio(cgroup); \
  585. \
  586. spin_lock_irq(&bgrp->lock); \
  587. bgrp->__VAR = (unsigned short)val; \
  588. hlist_for_each_entry(bfqg, n, &bgrp->group_data, group_node) { \
  589. /* \
  590. * Setting the ioprio_changed flag of the entity \
  591. * to 1 with new_##__VAR == ##__VAR would re-set \
  592. * the value of the weight to its ioprio mapping. \
  593. * Set the flag only if necessary. \
  594. */ \
  595. if ((unsigned short)val != bfqg->entity.new_##__VAR) { \
  596. bfqg->entity.new_##__VAR = (unsigned short)val; \
  597. /* \
  598. * Make sure that the above new value has been \
  599. * stored in bfqg->entity.new_##__VAR before \
  600. * setting the ioprio_changed flag. In fact, \
  601. * this flag may be read asynchronously (in \
  602. * critical sections protected by a different \
  603. * lock than that held here), and finding this \
  604. * flag set may cause the execution of the code \
  605. * for updating parameters whose value may \
  606. * depend also on bfqg->entity.new_##__VAR (in \
  607. * __bfq_entity_update_weight_prio). \
  608. * This barrier makes sure that the new value \
  609. * of bfqg->entity.new_##__VAR is correctly \
  610. * seen in that code. \
  611. */ \
  612. smp_wmb(); \
  613. bfqg->entity.ioprio_changed = 1; \
  614. } \
  615. } \
  616. spin_unlock_irq(&bgrp->lock); \
  617. \
  618. cgroup_unlock(); \
  619. \
  620. return 0; \
  621. }
  622. STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT);
  623. STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1);
  624. STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE);
  625. #undef STORE_FUNCTION
  626. static struct cftype bfqio_files[] = {
  627. {
  628. .name = "weight",
  629. .read_u64 = bfqio_cgroup_weight_read,
  630. .write_u64 = bfqio_cgroup_weight_write,
  631. },
  632. {
  633. .name = "ioprio",
  634. .read_u64 = bfqio_cgroup_ioprio_read,
  635. .write_u64 = bfqio_cgroup_ioprio_write,
  636. },
  637. {
  638. .name = "ioprio_class",
  639. .read_u64 = bfqio_cgroup_ioprio_class_read,
  640. .write_u64 = bfqio_cgroup_ioprio_class_write,
  641. },
  642. };
  643. static struct cgroup_subsys_state *bfqio_create(struct cgroup *cgroup)
  644. {
  645. struct bfqio_cgroup *bgrp;
  646. if (cgroup->parent != NULL) {
  647. bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL);
  648. if (bgrp == NULL)
  649. return ERR_PTR(-ENOMEM);
  650. } else
  651. bgrp = &bfqio_root_cgroup;
  652. spin_lock_init(&bgrp->lock);
  653. INIT_HLIST_HEAD(&bgrp->group_data);
  654. bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO;
  655. bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS;
  656. return &bgrp->css;
  657. }
  658. /*
  659. * We cannot support shared io contexts, as we have no means to support
  660. * two tasks with the same ioc in two different groups without major rework
  661. * of the main bic/bfqq data structures. By now we allow a task to change
  662. * its cgroup only if it's the only owner of its ioc; the drawback of this
  663. * behavior is that a group containing a task that forked using CLONE_IO
  664. * will not be destroyed until the tasks sharing the ioc die.
  665. */
  666. static int bfqio_can_attach(struct cgroup *cgroup, struct cgroup_taskset *tset)
  667. {
  668. struct task_struct *task;
  669. struct io_context *ioc;
  670. int ret = 0;
  671. cgroup_taskset_for_each(task, cgroup, tset) {
  672. /* task_lock() is needed to avoid races with exit_io_context() */
  673. task_lock(task);
  674. ioc = task->io_context;
  675. if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1)
  676. /*
  677. * ioc == NULL means that the task is either too
  678. * young or exiting: if it has still no ioc the
  679. * ioc can't be shared, if the task is exiting the
  680. * attach will fail anyway, no matter what we
  681. * return here.
  682. */
  683. ret = -EINVAL;
  684. task_unlock(task);
  685. if (ret)
  686. break;
  687. }
  688. return ret;
  689. }
  690. static void bfqio_attach(struct cgroup *cgroup, struct cgroup_taskset *tset)
  691. {
  692. struct task_struct *task;
  693. struct io_context *ioc;
  694. struct io_cq *icq;
  695. struct hlist_node *n;
  696. /*
  697. * IMPORTANT NOTE: The move of more than one process at a time to a
  698. * new group has not yet been tested.
  699. */
  700. cgroup_taskset_for_each(task, cgroup, tset) {
  701. ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
  702. if (ioc) {
  703. /*
  704. * Handle cgroup change here.
  705. */
  706. rcu_read_lock();
  707. hlist_for_each_entry_rcu(icq, n, &ioc->icq_list, ioc_node)
  708. if (!strncmp(
  709. icq->q->elevator->type->elevator_name,
  710. "bfq", ELV_NAME_MAX))
  711. bfq_bic_change_cgroup(icq_to_bic(icq),
  712. cgroup);
  713. rcu_read_unlock();
  714. put_io_context(ioc);
  715. }
  716. }
  717. }
  718. static void bfqio_destroy(struct cgroup *cgroup)
  719. {
  720. struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup);
  721. struct hlist_node *n, *tmp;
  722. struct bfq_group *bfqg;
  723. /*
  724. * Since we are destroying the cgroup, there are no more tasks
  725. * referencing it, and all the RCU grace periods that may have
  726. * referenced it are ended (as the destruction of the parent
  727. * cgroup is RCU-safe); bgrp->group_data will not be accessed by
  728. * anything else and we don't need any synchronization.
  729. */
  730. hlist_for_each_entry_safe(bfqg, n, tmp, &bgrp->group_data, group_node)
  731. bfq_destroy_group(bgrp, bfqg);
  732. BUG_ON(!hlist_empty(&bgrp->group_data));
  733. kfree(bgrp);
  734. }
  735. struct cgroup_subsys bfqio_subsys = {
  736. .name = "bfqio",
  737. .create = bfqio_create,
  738. .can_attach = bfqio_can_attach,
  739. .attach = bfqio_attach,
  740. .destroy = bfqio_destroy,
  741. .subsys_id = bfqio_subsys_id,
  742. };
  743. #else
  744. static inline void bfq_init_entity(struct bfq_entity *entity,
  745. struct bfq_group *bfqg)
  746. {
  747. entity->weight = entity->new_weight;
  748. entity->orig_weight = entity->new_weight;
  749. entity->ioprio = entity->new_ioprio;
  750. entity->ioprio_class = entity->new_ioprio_class;
  751. entity->sched_data = &bfqg->sched_data;
  752. }
  753. static inline struct bfq_group *
  754. bfq_bic_update_cgroup(struct bfq_io_cq *bic)
  755. {
  756. struct bfq_data *bfqd = bic_to_bfqd(bic);
  757. return bfqd->root_group;
  758. }
  759. static inline void bfq_bfqq_move(struct bfq_data *bfqd,
  760. struct bfq_queue *bfqq,
  761. struct bfq_entity *entity,
  762. struct bfq_group *bfqg)
  763. {
  764. }
  765. static void bfq_end_wr_async(struct bfq_data *bfqd)
  766. {
  767. bfq_end_wr_async_queues(bfqd, bfqd->root_group);
  768. }
  769. static inline void bfq_disconnect_groups(struct bfq_data *bfqd)
  770. {
  771. bfq_put_async_queues(bfqd, bfqd->root_group);
  772. }
  773. static inline void bfq_free_root_group(struct bfq_data *bfqd)
  774. {
  775. kfree(bfqd->root_group);
  776. }
  777. static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)
  778. {
  779. struct bfq_group *bfqg;
  780. int i;
  781. bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
  782. if (bfqg == NULL)
  783. return NULL;
  784. for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
  785. bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
  786. return bfqg;
  787. }
  788. #endif