cpu_rmap.c 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. /*
  2. * cpu_rmap.c: CPU affinity reverse-map support
  3. * Copyright 2011 Solarflare Communications Inc.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms of the GNU General Public License version 2 as published
  7. * by the Free Software Foundation, incorporated herein by reference.
  8. */
  9. #include <linux/cpu_rmap.h>
  10. #ifdef CONFIG_GENERIC_HARDIRQS
  11. #include <linux/interrupt.h>
  12. #endif
  13. #include <linux/export.h>
  14. /*
  15. * These functions maintain a mapping from CPUs to some ordered set of
  16. * objects with CPU affinities. This can be seen as a reverse-map of
  17. * CPU affinity. However, we do not assume that the object affinities
  18. * cover all CPUs in the system. For those CPUs not directly covered
  19. * by object affinities, we attempt to find a nearest object based on
  20. * CPU topology.
  21. */
  22. /**
  23. * alloc_cpu_rmap - allocate CPU affinity reverse-map
  24. * @size: Number of objects to be mapped
  25. * @flags: Allocation flags e.g. %GFP_KERNEL
  26. */
  27. struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
  28. {
  29. struct cpu_rmap *rmap;
  30. unsigned int cpu;
  31. size_t obj_offset;
  32. /* This is a silly number of objects, and we use u16 indices. */
  33. if (size > 0xffff)
  34. return NULL;
  35. /* Offset of object pointer array from base structure */
  36. obj_offset = ALIGN(offsetof(struct cpu_rmap, near[nr_cpu_ids]),
  37. sizeof(void *));
  38. rmap = kzalloc(obj_offset + size * sizeof(rmap->obj[0]), flags);
  39. if (!rmap)
  40. return NULL;
  41. kref_init(&rmap->refcount);
  42. rmap->obj = (void **)((char *)rmap + obj_offset);
  43. /* Initially assign CPUs to objects on a rota, since we have
  44. * no idea where the objects are. Use infinite distance, so
  45. * any object with known distance is preferable. Include the
  46. * CPUs that are not present/online, since we definitely want
  47. * any newly-hotplugged CPUs to have some object assigned.
  48. */
  49. for_each_possible_cpu(cpu) {
  50. rmap->near[cpu].index = cpu % size;
  51. rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
  52. }
  53. rmap->size = size;
  54. return rmap;
  55. }
  56. EXPORT_SYMBOL(alloc_cpu_rmap);
  57. /**
  58. * cpu_rmap_release - internal reclaiming helper called from kref_put
  59. * @ref: kref to struct cpu_rmap
  60. */
  61. static void cpu_rmap_release(struct kref *ref)
  62. {
  63. struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount);
  64. kfree(rmap);
  65. }
  66. /**
  67. * cpu_rmap_get - internal helper to get new ref on a cpu_rmap
  68. * @rmap: reverse-map allocated with alloc_cpu_rmap()
  69. */
  70. static inline void cpu_rmap_get(struct cpu_rmap *rmap)
  71. {
  72. kref_get(&rmap->refcount);
  73. }
  74. /**
  75. * cpu_rmap_put - release ref on a cpu_rmap
  76. * @rmap: reverse-map allocated with alloc_cpu_rmap()
  77. */
  78. int cpu_rmap_put(struct cpu_rmap *rmap)
  79. {
  80. return kref_put(&rmap->refcount, cpu_rmap_release);
  81. }
  82. EXPORT_SYMBOL(cpu_rmap_put);
  83. /* Reevaluate nearest object for given CPU, comparing with the given
  84. * neighbours at the given distance.
  85. */
  86. static bool cpu_rmap_copy_neigh(struct cpu_rmap *rmap, unsigned int cpu,
  87. const struct cpumask *mask, u16 dist)
  88. {
  89. int neigh;
  90. for_each_cpu(neigh, mask) {
  91. if (rmap->near[cpu].dist > dist &&
  92. rmap->near[neigh].dist <= dist) {
  93. rmap->near[cpu].index = rmap->near[neigh].index;
  94. rmap->near[cpu].dist = dist;
  95. return true;
  96. }
  97. }
  98. return false;
  99. }
  100. #ifdef DEBUG
  101. static void debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
  102. {
  103. unsigned index;
  104. unsigned int cpu;
  105. pr_info("cpu_rmap %p, %s:\n", rmap, prefix);
  106. for_each_possible_cpu(cpu) {
  107. index = rmap->near[cpu].index;
  108. pr_info("cpu %d -> obj %u (distance %u)\n",
  109. cpu, index, rmap->near[cpu].dist);
  110. }
  111. }
  112. #else
  113. static inline void
  114. debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
  115. {
  116. }
  117. #endif
  118. /**
  119. * cpu_rmap_add - add object to a rmap
  120. * @rmap: CPU rmap allocated with alloc_cpu_rmap()
  121. * @obj: Object to add to rmap
  122. *
  123. * Return index of object.
  124. */
  125. int cpu_rmap_add(struct cpu_rmap *rmap, void *obj)
  126. {
  127. u16 index;
  128. BUG_ON(rmap->used >= rmap->size);
  129. index = rmap->used++;
  130. rmap->obj[index] = obj;
  131. return index;
  132. }
  133. EXPORT_SYMBOL(cpu_rmap_add);
  134. /**
  135. * cpu_rmap_update - update CPU rmap following a change of object affinity
  136. * @rmap: CPU rmap to update
  137. * @index: Index of object whose affinity changed
  138. * @affinity: New CPU affinity of object
  139. */
  140. int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
  141. const struct cpumask *affinity)
  142. {
  143. cpumask_var_t update_mask;
  144. unsigned int cpu;
  145. if (unlikely(!zalloc_cpumask_var(&update_mask, GFP_KERNEL)))
  146. return -ENOMEM;
  147. /* Invalidate distance for all CPUs for which this used to be
  148. * the nearest object. Mark those CPUs for update.
  149. */
  150. for_each_online_cpu(cpu) {
  151. if (rmap->near[cpu].index == index) {
  152. rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
  153. cpumask_set_cpu(cpu, update_mask);
  154. }
  155. }
  156. debug_print_rmap(rmap, "after invalidating old distances");
  157. /* Set distance to 0 for all CPUs in the new affinity mask.
  158. * Mark all CPUs within their NUMA nodes for update.
  159. */
  160. for_each_cpu(cpu, affinity) {
  161. rmap->near[cpu].index = index;
  162. rmap->near[cpu].dist = 0;
  163. cpumask_or(update_mask, update_mask,
  164. cpumask_of_node(cpu_to_node(cpu)));
  165. }
  166. debug_print_rmap(rmap, "after updating neighbours");
  167. /* Update distances based on topology */
  168. for_each_cpu(cpu, update_mask) {
  169. if (cpu_rmap_copy_neigh(rmap, cpu,
  170. topology_thread_cpumask(cpu), 1))
  171. continue;
  172. if (cpu_rmap_copy_neigh(rmap, cpu,
  173. topology_core_cpumask(cpu), 2))
  174. continue;
  175. if (cpu_rmap_copy_neigh(rmap, cpu,
  176. cpumask_of_node(cpu_to_node(cpu)), 3))
  177. continue;
  178. /* We could continue into NUMA node distances, but for now
  179. * we give up.
  180. */
  181. }
  182. debug_print_rmap(rmap, "after copying neighbours");
  183. free_cpumask_var(update_mask);
  184. return 0;
  185. }
  186. EXPORT_SYMBOL(cpu_rmap_update);
  187. #ifdef CONFIG_GENERIC_HARDIRQS
  188. /* Glue between IRQ affinity notifiers and CPU rmaps */
  189. struct irq_glue {
  190. struct irq_affinity_notify notify;
  191. struct cpu_rmap *rmap;
  192. u16 index;
  193. };
  194. /**
  195. * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs
  196. * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL
  197. *
  198. * Must be called in process context, before freeing the IRQs.
  199. */
  200. void free_irq_cpu_rmap(struct cpu_rmap *rmap)
  201. {
  202. struct irq_glue *glue;
  203. u16 index;
  204. if (!rmap)
  205. return;
  206. for (index = 0; index < rmap->used; index++) {
  207. glue = rmap->obj[index];
  208. irq_set_affinity_notifier(glue->notify.irq, NULL);
  209. }
  210. cpu_rmap_put(rmap);
  211. }
  212. EXPORT_SYMBOL(free_irq_cpu_rmap);
  213. /**
  214. * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated
  215. * @notify: struct irq_affinity_notify passed by irq/manage.c
  216. * @mask: cpu mask for new SMP affinity
  217. *
  218. * This is executed in workqueue context.
  219. */
  220. static void
  221. irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
  222. {
  223. struct irq_glue *glue =
  224. container_of(notify, struct irq_glue, notify);
  225. int rc;
  226. rc = cpu_rmap_update(glue->rmap, glue->index, mask);
  227. if (rc)
  228. pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc);
  229. }
  230. /**
  231. * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem
  232. * @ref: kref to struct irq_affinity_notify passed by irq/manage.c
  233. */
  234. static void irq_cpu_rmap_release(struct kref *ref)
  235. {
  236. struct irq_glue *glue =
  237. container_of(ref, struct irq_glue, notify.kref);
  238. cpu_rmap_put(glue->rmap);
  239. kfree(glue);
  240. }
  241. /**
  242. * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map
  243. * @rmap: The reverse-map
  244. * @irq: The IRQ number
  245. *
  246. * This adds an IRQ affinity notifier that will update the reverse-map
  247. * automatically.
  248. *
  249. * Must be called in process context, after the IRQ is allocated but
  250. * before it is bound with request_irq().
  251. */
  252. int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
  253. {
  254. struct irq_glue *glue = kzalloc(sizeof(*glue), GFP_KERNEL);
  255. int rc;
  256. if (!glue)
  257. return -ENOMEM;
  258. glue->notify.notify = irq_cpu_rmap_notify;
  259. glue->notify.release = irq_cpu_rmap_release;
  260. glue->rmap = rmap;
  261. cpu_rmap_get(rmap);
  262. glue->index = cpu_rmap_add(rmap, glue);
  263. rc = irq_set_affinity_notifier(irq, &glue->notify);
  264. if (rc) {
  265. cpu_rmap_put(glue->rmap);
  266. kfree(glue);
  267. }
  268. return rc;
  269. }
  270. EXPORT_SYMBOL(irq_cpu_rmap_add);
  271. #endif /* CONFIG_GENERIC_HARDIRQS */