numa.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505
  1. /*
  2. * acpi_numa.c - ACPI NUMA support
  3. *
  4. * Copyright (C) 2002 Takayoshi Kochi <t-kochi@bq.jp.nec.com>
  5. *
  6. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 2 of the License, or
  11. * (at your option) any later version.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  19. *
  20. */
  21. #define pr_fmt(fmt) "ACPI: " fmt
  22. #include <linux/module.h>
  23. #include <linux/init.h>
  24. #include <linux/kernel.h>
  25. #include <linux/types.h>
  26. #include <linux/errno.h>
  27. #include <linux/acpi.h>
  28. #include <linux/bootmem.h>
  29. #include <linux/memblock.h>
  30. #include <linux/numa.h>
  31. #include <linux/nodemask.h>
  32. #include <linux/topology.h>
  33. static nodemask_t nodes_found_map = NODE_MASK_NONE;
  34. /* maps to convert between proximity domain and logical node ID */
  35. static int pxm_to_node_map[MAX_PXM_DOMAINS]
  36. = { [0 ... MAX_PXM_DOMAINS - 1] = NUMA_NO_NODE };
  37. static int node_to_pxm_map[MAX_NUMNODES]
  38. = { [0 ... MAX_NUMNODES - 1] = PXM_INVAL };
  39. unsigned char acpi_srat_revision __initdata;
  40. int acpi_numa __initdata;
  41. int pxm_to_node(int pxm)
  42. {
  43. if (pxm < 0)
  44. return NUMA_NO_NODE;
  45. return pxm_to_node_map[pxm];
  46. }
  47. int node_to_pxm(int node)
  48. {
  49. if (node < 0)
  50. return PXM_INVAL;
  51. return node_to_pxm_map[node];
  52. }
  53. static void __acpi_map_pxm_to_node(int pxm, int node)
  54. {
  55. if (pxm_to_node_map[pxm] == NUMA_NO_NODE || node < pxm_to_node_map[pxm])
  56. pxm_to_node_map[pxm] = node;
  57. if (node_to_pxm_map[node] == PXM_INVAL || pxm < node_to_pxm_map[node])
  58. node_to_pxm_map[node] = pxm;
  59. }
  60. int acpi_map_pxm_to_node(int pxm)
  61. {
  62. int node;
  63. if (pxm < 0 || pxm >= MAX_PXM_DOMAINS)
  64. return NUMA_NO_NODE;
  65. node = pxm_to_node_map[pxm];
  66. if (node == NUMA_NO_NODE) {
  67. if (nodes_weight(nodes_found_map) >= MAX_NUMNODES)
  68. return NUMA_NO_NODE;
  69. node = first_unset_node(nodes_found_map);
  70. __acpi_map_pxm_to_node(pxm, node);
  71. node_set(node, nodes_found_map);
  72. }
  73. return node;
  74. }
  75. /**
  76. * acpi_map_pxm_to_online_node - Map proximity ID to online node
  77. * @pxm: ACPI proximity ID
  78. *
  79. * This is similar to acpi_map_pxm_to_node(), but always returns an online
  80. * node. When the mapped node from a given proximity ID is offline, it
  81. * looks up the node distance table and returns the nearest online node.
  82. *
  83. * ACPI device drivers, which are called after the NUMA initialization has
  84. * completed in the kernel, can call this interface to obtain their device
  85. * NUMA topology from ACPI tables. Such drivers do not have to deal with
  86. * offline nodes. A node may be offline when a device proximity ID is
  87. * unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
  88. * "numa=off" on x86.
  89. */
  90. int acpi_map_pxm_to_online_node(int pxm)
  91. {
  92. int node, min_node;
  93. node = acpi_map_pxm_to_node(pxm);
  94. if (node == NUMA_NO_NODE)
  95. node = 0;
  96. min_node = node;
  97. if (!node_online(node)) {
  98. int min_dist = INT_MAX, dist, n;
  99. for_each_online_node(n) {
  100. dist = node_distance(node, n);
  101. if (dist < min_dist) {
  102. min_dist = dist;
  103. min_node = n;
  104. }
  105. }
  106. }
  107. return min_node;
  108. }
  109. EXPORT_SYMBOL(acpi_map_pxm_to_online_node);
  110. static void __init
  111. acpi_table_print_srat_entry(struct acpi_subtable_header *header)
  112. {
  113. switch (header->type) {
  114. case ACPI_SRAT_TYPE_CPU_AFFINITY:
  115. {
  116. struct acpi_srat_cpu_affinity *p =
  117. (struct acpi_srat_cpu_affinity *)header;
  118. pr_debug("SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n",
  119. p->apic_id, p->local_sapic_eid,
  120. p->proximity_domain_lo,
  121. (p->flags & ACPI_SRAT_CPU_ENABLED) ?
  122. "enabled" : "disabled");
  123. }
  124. break;
  125. case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
  126. {
  127. struct acpi_srat_mem_affinity *p =
  128. (struct acpi_srat_mem_affinity *)header;
  129. pr_debug("SRAT Memory (0x%lx length 0x%lx) in proximity domain %d %s%s%s\n",
  130. (unsigned long)p->base_address,
  131. (unsigned long)p->length,
  132. p->proximity_domain,
  133. (p->flags & ACPI_SRAT_MEM_ENABLED) ?
  134. "enabled" : "disabled",
  135. (p->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ?
  136. " hot-pluggable" : "",
  137. (p->flags & ACPI_SRAT_MEM_NON_VOLATILE) ?
  138. " non-volatile" : "");
  139. }
  140. break;
  141. case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
  142. {
  143. struct acpi_srat_x2apic_cpu_affinity *p =
  144. (struct acpi_srat_x2apic_cpu_affinity *)header;
  145. pr_debug("SRAT Processor (x2apicid[0x%08x]) in proximity domain %d %s\n",
  146. p->apic_id,
  147. p->proximity_domain,
  148. (p->flags & ACPI_SRAT_CPU_ENABLED) ?
  149. "enabled" : "disabled");
  150. }
  151. break;
  152. case ACPI_SRAT_TYPE_GICC_AFFINITY:
  153. {
  154. struct acpi_srat_gicc_affinity *p =
  155. (struct acpi_srat_gicc_affinity *)header;
  156. pr_debug("SRAT Processor (acpi id[0x%04x]) in proximity domain %d %s\n",
  157. p->acpi_processor_uid,
  158. p->proximity_domain,
  159. (p->flags & ACPI_SRAT_GICC_ENABLED) ?
  160. "enabled" : "disabled");
  161. }
  162. break;
  163. default:
  164. pr_warn("Found unsupported SRAT entry (type = 0x%x)\n",
  165. header->type);
  166. break;
  167. }
  168. }
  169. /*
  170. * A lot of BIOS fill in 10 (= no distance) everywhere. This messes
  171. * up the NUMA heuristics which wants the local node to have a smaller
  172. * distance than the others.
  173. * Do some quick checks here and only use the SLIT if it passes.
  174. */
  175. static int __init slit_valid(struct acpi_table_slit *slit)
  176. {
  177. int i, j;
  178. int d = slit->locality_count;
  179. for (i = 0; i < d; i++) {
  180. for (j = 0; j < d; j++) {
  181. u8 val = slit->entry[d*i + j];
  182. if (i == j) {
  183. if (val != LOCAL_DISTANCE)
  184. return 0;
  185. } else if (val <= LOCAL_DISTANCE)
  186. return 0;
  187. }
  188. }
  189. return 1;
  190. }
  191. void __init bad_srat(void)
  192. {
  193. pr_err("SRAT: SRAT not used.\n");
  194. acpi_numa = -1;
  195. }
  196. int __init srat_disabled(void)
  197. {
  198. return acpi_numa < 0;
  199. }
  200. #if defined(CONFIG_X86) || defined(CONFIG_ARM64)
  201. /*
  202. * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for
  203. * I/O localities since SRAT does not list them. I/O localities are
  204. * not supported at this point.
  205. */
  206. void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
  207. {
  208. int i, j;
  209. for (i = 0; i < slit->locality_count; i++) {
  210. const int from_node = pxm_to_node(i);
  211. if (from_node == NUMA_NO_NODE)
  212. continue;
  213. for (j = 0; j < slit->locality_count; j++) {
  214. const int to_node = pxm_to_node(j);
  215. if (to_node == NUMA_NO_NODE)
  216. continue;
  217. numa_set_distance(from_node, to_node,
  218. slit->entry[slit->locality_count * i + j]);
  219. }
  220. }
  221. }
  222. /*
  223. * Default callback for parsing of the Proximity Domain <-> Memory
  224. * Area mappings
  225. */
  226. int __init
  227. acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
  228. {
  229. u64 start, end;
  230. u32 hotpluggable;
  231. int node, pxm;
  232. if (srat_disabled())
  233. goto out_err;
  234. if (ma->header.length < sizeof(struct acpi_srat_mem_affinity)) {
  235. pr_err("SRAT: Unexpected header length: %d\n",
  236. ma->header.length);
  237. goto out_err_bad_srat;
  238. }
  239. if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
  240. goto out_err;
  241. hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
  242. if (hotpluggable && !IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
  243. goto out_err;
  244. start = ma->base_address;
  245. end = start + ma->length;
  246. pxm = ma->proximity_domain;
  247. if (acpi_srat_revision <= 1)
  248. pxm &= 0xff;
  249. node = acpi_map_pxm_to_node(pxm);
  250. if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
  251. pr_err("SRAT: Too many proximity domains.\n");
  252. goto out_err_bad_srat;
  253. }
  254. if (numa_add_memblk(node, start, end) < 0) {
  255. pr_err("SRAT: Failed to add memblk to node %u [mem %#010Lx-%#010Lx]\n",
  256. node, (unsigned long long) start,
  257. (unsigned long long) end - 1);
  258. goto out_err_bad_srat;
  259. }
  260. node_set(node, numa_nodes_parsed);
  261. pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n",
  262. node, pxm,
  263. (unsigned long long) start, (unsigned long long) end - 1,
  264. hotpluggable ? " hotplug" : "",
  265. ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : "");
  266. /* Mark hotplug range in memblock. */
  267. if (hotpluggable && memblock_mark_hotplug(start, ma->length))
  268. pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
  269. (unsigned long long)start, (unsigned long long)end - 1);
  270. max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
  271. return 0;
  272. out_err_bad_srat:
  273. bad_srat();
  274. out_err:
  275. return -EINVAL;
  276. }
  277. #endif /* defined(CONFIG_X86) || defined (CONFIG_ARM64) */
  278. static int __init acpi_parse_slit(struct acpi_table_header *table)
  279. {
  280. struct acpi_table_slit *slit = (struct acpi_table_slit *)table;
  281. if (!slit_valid(slit)) {
  282. pr_info("SLIT table looks invalid. Not used.\n");
  283. return -EINVAL;
  284. }
  285. acpi_numa_slit_init(slit);
  286. return 0;
  287. }
  288. void __init __weak
  289. acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
  290. {
  291. pr_warn("Found unsupported x2apic [0x%08x] SRAT entry\n", pa->apic_id);
  292. }
  293. static int __init
  294. acpi_parse_x2apic_affinity(struct acpi_subtable_header *header,
  295. const unsigned long end)
  296. {
  297. struct acpi_srat_x2apic_cpu_affinity *processor_affinity;
  298. processor_affinity = (struct acpi_srat_x2apic_cpu_affinity *)header;
  299. if (!processor_affinity)
  300. return -EINVAL;
  301. acpi_table_print_srat_entry(header);
  302. /* let architecture-dependent part to do it */
  303. acpi_numa_x2apic_affinity_init(processor_affinity);
  304. return 0;
  305. }
  306. static int __init
  307. acpi_parse_processor_affinity(struct acpi_subtable_header *header,
  308. const unsigned long end)
  309. {
  310. struct acpi_srat_cpu_affinity *processor_affinity;
  311. processor_affinity = (struct acpi_srat_cpu_affinity *)header;
  312. if (!processor_affinity)
  313. return -EINVAL;
  314. acpi_table_print_srat_entry(header);
  315. /* let architecture-dependent part to do it */
  316. acpi_numa_processor_affinity_init(processor_affinity);
  317. return 0;
  318. }
  319. static int __init
  320. acpi_parse_gicc_affinity(struct acpi_subtable_header *header,
  321. const unsigned long end)
  322. {
  323. struct acpi_srat_gicc_affinity *processor_affinity;
  324. processor_affinity = (struct acpi_srat_gicc_affinity *)header;
  325. if (!processor_affinity)
  326. return -EINVAL;
  327. acpi_table_print_srat_entry(header);
  328. /* let architecture-dependent part to do it */
  329. acpi_numa_gicc_affinity_init(processor_affinity);
  330. return 0;
  331. }
  332. static int __initdata parsed_numa_memblks;
  333. static int __init
  334. acpi_parse_memory_affinity(struct acpi_subtable_header * header,
  335. const unsigned long end)
  336. {
  337. struct acpi_srat_mem_affinity *memory_affinity;
  338. memory_affinity = (struct acpi_srat_mem_affinity *)header;
  339. if (!memory_affinity)
  340. return -EINVAL;
  341. acpi_table_print_srat_entry(header);
  342. /* let architecture-dependent part to do it */
  343. if (!acpi_numa_memory_affinity_init(memory_affinity))
  344. parsed_numa_memblks++;
  345. return 0;
  346. }
  347. static int __init acpi_parse_srat(struct acpi_table_header *table)
  348. {
  349. struct acpi_table_srat *srat = (struct acpi_table_srat *)table;
  350. acpi_srat_revision = srat->header.revision;
  351. /* Real work done in acpi_table_parse_srat below. */
  352. return 0;
  353. }
  354. static int __init
  355. acpi_table_parse_srat(enum acpi_srat_type id,
  356. acpi_tbl_entry_handler handler, unsigned int max_entries)
  357. {
  358. return acpi_table_parse_entries(ACPI_SIG_SRAT,
  359. sizeof(struct acpi_table_srat), id,
  360. handler, max_entries);
  361. }
  362. int __init acpi_numa_init(void)
  363. {
  364. int cnt = 0;
  365. if (acpi_disabled)
  366. return -EINVAL;
  367. /*
  368. * Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
  369. * SRAT cpu entries could have different order with that in MADT.
  370. * So go over all cpu entries in SRAT to get apicid to node mapping.
  371. */
  372. /* SRAT: Static Resource Affinity Table */
  373. if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
  374. struct acpi_subtable_proc srat_proc[3];
  375. memset(srat_proc, 0, sizeof(srat_proc));
  376. srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY;
  377. srat_proc[0].handler = acpi_parse_processor_affinity;
  378. srat_proc[1].id = ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY;
  379. srat_proc[1].handler = acpi_parse_x2apic_affinity;
  380. srat_proc[2].id = ACPI_SRAT_TYPE_GICC_AFFINITY;
  381. srat_proc[2].handler = acpi_parse_gicc_affinity;
  382. acpi_table_parse_entries_array(ACPI_SIG_SRAT,
  383. sizeof(struct acpi_table_srat),
  384. srat_proc, ARRAY_SIZE(srat_proc), 0);
  385. cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
  386. acpi_parse_memory_affinity,
  387. NR_NODE_MEMBLKS);
  388. }
  389. /* SLIT: System Locality Information Table */
  390. acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit);
  391. if (cnt < 0)
  392. return cnt;
  393. else if (!parsed_numa_memblks)
  394. return -ENOENT;
  395. return 0;
  396. }
  397. static int acpi_get_pxm(acpi_handle h)
  398. {
  399. unsigned long long pxm;
  400. acpi_status status;
  401. acpi_handle handle;
  402. acpi_handle phandle = h;
  403. do {
  404. handle = phandle;
  405. status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
  406. if (ACPI_SUCCESS(status))
  407. return pxm;
  408. status = acpi_get_parent(handle, &phandle);
  409. } while (ACPI_SUCCESS(status));
  410. return -1;
  411. }
  412. int acpi_get_node(acpi_handle handle)
  413. {
  414. int pxm;
  415. pxm = acpi_get_pxm(handle);
  416. return acpi_map_pxm_to_node(pxm);
  417. }
  418. EXPORT_SYMBOL(acpi_get_node);