123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439 |
- /* cpumap.c: used for optimizing CPU assignment
- *
- * Copyright (C) 2009 Hong H. Pham <hong.pham@windriver.com>
- */
- #include <linux/export.h>
- #include <linux/slab.h>
- #include <linux/kernel.h>
- #include <linux/cpumask.h>
- #include <linux/spinlock.h>
- #include <asm/cpudata.h>
- #include "cpumap.h"
- enum {
- CPUINFO_LVL_ROOT = 0,
- CPUINFO_LVL_NODE,
- CPUINFO_LVL_CORE,
- CPUINFO_LVL_PROC,
- CPUINFO_LVL_MAX,
- };
- enum {
- ROVER_NO_OP = 0,
- /* Increment rover every time level is visited */
- ROVER_INC_ON_VISIT = 1 << 0,
- /* Increment parent's rover every time rover wraps around */
- ROVER_INC_PARENT_ON_LOOP = 1 << 1,
- };
- struct cpuinfo_node {
- int id;
- int level;
- int num_cpus; /* Number of CPUs in this hierarchy */
- int parent_index;
- int child_start; /* Array index of the first child node */
- int child_end; /* Array index of the last child node */
- int rover; /* Child node iterator */
- };
- struct cpuinfo_level {
- int start_index; /* Index of first node of a level in a cpuinfo tree */
- int end_index; /* Index of last node of a level in a cpuinfo tree */
- int num_nodes; /* Number of nodes in a level in a cpuinfo tree */
- };
- struct cpuinfo_tree {
- int total_nodes;
- /* Offsets into nodes[] for each level of the tree */
- struct cpuinfo_level level[CPUINFO_LVL_MAX];
- struct cpuinfo_node nodes[0];
- };
- static struct cpuinfo_tree *cpuinfo_tree;
- static u16 cpu_distribution_map[NR_CPUS];
- static DEFINE_SPINLOCK(cpu_map_lock);
- /* Niagara optimized cpuinfo tree traversal. */
- static const int niagara_iterate_method[] = {
- [CPUINFO_LVL_ROOT] = ROVER_NO_OP,
- /* Strands (or virtual CPUs) within a core may not run concurrently
- * on the Niagara, as instruction pipeline(s) are shared. Distribute
- * work to strands in different cores first for better concurrency.
- * Go to next NUMA node when all cores are used.
- */
- [CPUINFO_LVL_NODE] = ROVER_INC_ON_VISIT|ROVER_INC_PARENT_ON_LOOP,
- /* Strands are grouped together by proc_id in cpuinfo_sparc, i.e.
- * a proc_id represents an instruction pipeline. Distribute work to
- * strands in different proc_id groups if the core has multiple
- * instruction pipelines (e.g. the Niagara 2/2+ has two).
- */
- [CPUINFO_LVL_CORE] = ROVER_INC_ON_VISIT,
- /* Pick the next strand in the proc_id group. */
- [CPUINFO_LVL_PROC] = ROVER_INC_ON_VISIT,
- };
- /* Generic cpuinfo tree traversal. Distribute work round robin across NUMA
- * nodes.
- */
- static const int generic_iterate_method[] = {
- [CPUINFO_LVL_ROOT] = ROVER_INC_ON_VISIT,
- [CPUINFO_LVL_NODE] = ROVER_NO_OP,
- [CPUINFO_LVL_CORE] = ROVER_INC_PARENT_ON_LOOP,
- [CPUINFO_LVL_PROC] = ROVER_INC_ON_VISIT|ROVER_INC_PARENT_ON_LOOP,
- };
- static int cpuinfo_id(int cpu, int level)
- {
- int id;
- switch (level) {
- case CPUINFO_LVL_ROOT:
- id = 0;
- break;
- case CPUINFO_LVL_NODE:
- id = cpu_to_node(cpu);
- break;
- case CPUINFO_LVL_CORE:
- id = cpu_data(cpu).core_id;
- break;
- case CPUINFO_LVL_PROC:
- id = cpu_data(cpu).proc_id;
- break;
- default:
- id = -EINVAL;
- }
- return id;
- }
- /*
- * Enumerate the CPU information in __cpu_data to determine the start index,
- * end index, and number of nodes for each level in the cpuinfo tree. The
- * total number of cpuinfo nodes required to build the tree is returned.
- */
- static int enumerate_cpuinfo_nodes(struct cpuinfo_level *tree_level)
- {
- int prev_id[CPUINFO_LVL_MAX];
- int i, n, num_nodes;
- for (i = CPUINFO_LVL_ROOT; i < CPUINFO_LVL_MAX; i++) {
- struct cpuinfo_level *lv = &tree_level[i];
- prev_id[i] = -1;
- lv->start_index = lv->end_index = lv->num_nodes = 0;
- }
- num_nodes = 1; /* Include the root node */
- for (i = 0; i < num_possible_cpus(); i++) {
- if (!cpu_online(i))
- continue;
- n = cpuinfo_id(i, CPUINFO_LVL_NODE);
- if (n > prev_id[CPUINFO_LVL_NODE]) {
- tree_level[CPUINFO_LVL_NODE].num_nodes++;
- prev_id[CPUINFO_LVL_NODE] = n;
- num_nodes++;
- }
- n = cpuinfo_id(i, CPUINFO_LVL_CORE);
- if (n > prev_id[CPUINFO_LVL_CORE]) {
- tree_level[CPUINFO_LVL_CORE].num_nodes++;
- prev_id[CPUINFO_LVL_CORE] = n;
- num_nodes++;
- }
- n = cpuinfo_id(i, CPUINFO_LVL_PROC);
- if (n > prev_id[CPUINFO_LVL_PROC]) {
- tree_level[CPUINFO_LVL_PROC].num_nodes++;
- prev_id[CPUINFO_LVL_PROC] = n;
- num_nodes++;
- }
- }
- tree_level[CPUINFO_LVL_ROOT].num_nodes = 1;
- n = tree_level[CPUINFO_LVL_NODE].num_nodes;
- tree_level[CPUINFO_LVL_NODE].start_index = 1;
- tree_level[CPUINFO_LVL_NODE].end_index = n;
- n++;
- tree_level[CPUINFO_LVL_CORE].start_index = n;
- n += tree_level[CPUINFO_LVL_CORE].num_nodes;
- tree_level[CPUINFO_LVL_CORE].end_index = n - 1;
- tree_level[CPUINFO_LVL_PROC].start_index = n;
- n += tree_level[CPUINFO_LVL_PROC].num_nodes;
- tree_level[CPUINFO_LVL_PROC].end_index = n - 1;
- return num_nodes;
- }
- /* Build a tree representation of the CPU hierarchy using the per CPU
- * information in __cpu_data. Entries in __cpu_data[0..NR_CPUS] are
- * assumed to be sorted in ascending order based on node, core_id, and
- * proc_id (in order of significance).
- */
- static struct cpuinfo_tree *build_cpuinfo_tree(void)
- {
- struct cpuinfo_tree *new_tree;
- struct cpuinfo_node *node;
- struct cpuinfo_level tmp_level[CPUINFO_LVL_MAX];
- int num_cpus[CPUINFO_LVL_MAX];
- int level_rover[CPUINFO_LVL_MAX];
- int prev_id[CPUINFO_LVL_MAX];
- int n, id, cpu, prev_cpu, last_cpu, level;
- n = enumerate_cpuinfo_nodes(tmp_level);
- new_tree = kzalloc(sizeof(struct cpuinfo_tree) +
- (sizeof(struct cpuinfo_node) * n), GFP_ATOMIC);
- if (!new_tree)
- return NULL;
- new_tree->total_nodes = n;
- memcpy(&new_tree->level, tmp_level, sizeof(tmp_level));
- prev_cpu = cpu = cpumask_first(cpu_online_mask);
- /* Initialize all levels in the tree with the first CPU */
- for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT; level--) {
- n = new_tree->level[level].start_index;
- level_rover[level] = n;
- node = &new_tree->nodes[n];
- id = cpuinfo_id(cpu, level);
- if (unlikely(id < 0)) {
- kfree(new_tree);
- return NULL;
- }
- node->id = id;
- node->level = level;
- node->num_cpus = 1;
- node->parent_index = (level > CPUINFO_LVL_ROOT)
- ? new_tree->level[level - 1].start_index : -1;
- node->child_start = node->child_end = node->rover =
- (level == CPUINFO_LVL_PROC)
- ? cpu : new_tree->level[level + 1].start_index;
- prev_id[level] = node->id;
- num_cpus[level] = 1;
- }
- for (last_cpu = (num_possible_cpus() - 1); last_cpu >= 0; last_cpu--) {
- if (cpu_online(last_cpu))
- break;
- }
- while (++cpu <= last_cpu) {
- if (!cpu_online(cpu))
- continue;
- for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT;
- level--) {
- id = cpuinfo_id(cpu, level);
- if (unlikely(id < 0)) {
- kfree(new_tree);
- return NULL;
- }
- if ((id != prev_id[level]) || (cpu == last_cpu)) {
- prev_id[level] = id;
- node = &new_tree->nodes[level_rover[level]];
- node->num_cpus = num_cpus[level];
- num_cpus[level] = 1;
- if (cpu == last_cpu)
- node->num_cpus++;
- /* Connect tree node to parent */
- if (level == CPUINFO_LVL_ROOT)
- node->parent_index = -1;
- else
- node->parent_index =
- level_rover[level - 1];
- if (level == CPUINFO_LVL_PROC) {
- node->child_end =
- (cpu == last_cpu) ? cpu : prev_cpu;
- } else {
- node->child_end =
- level_rover[level + 1] - 1;
- }
- /* Initialize the next node in the same level */
- n = ++level_rover[level];
- if (n <= new_tree->level[level].end_index) {
- node = &new_tree->nodes[n];
- node->id = id;
- node->level = level;
- /* Connect node to child */
- node->child_start = node->child_end =
- node->rover =
- (level == CPUINFO_LVL_PROC)
- ? cpu : level_rover[level + 1];
- }
- } else
- num_cpus[level]++;
- }
- prev_cpu = cpu;
- }
- return new_tree;
- }
- static void increment_rover(struct cpuinfo_tree *t, int node_index,
- int root_index, const int *rover_inc_table)
- {
- struct cpuinfo_node *node = &t->nodes[node_index];
- int top_level, level;
- top_level = t->nodes[root_index].level;
- for (level = node->level; level >= top_level; level--) {
- node->rover++;
- if (node->rover <= node->child_end)
- return;
- node->rover = node->child_start;
- /* If parent's rover does not need to be adjusted, stop here. */
- if ((level == top_level) ||
- !(rover_inc_table[level] & ROVER_INC_PARENT_ON_LOOP))
- return;
- node = &t->nodes[node->parent_index];
- }
- }
- static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index)
- {
- const int *rover_inc_table;
- int level, new_index, index = root_index;
- switch (sun4v_chip_type) {
- case SUN4V_CHIP_NIAGARA1:
- case SUN4V_CHIP_NIAGARA2:
- case SUN4V_CHIP_NIAGARA3:
- case SUN4V_CHIP_NIAGARA4:
- case SUN4V_CHIP_NIAGARA5:
- case SUN4V_CHIP_SPARC_M6:
- case SUN4V_CHIP_SPARC_M7:
- case SUN4V_CHIP_SPARC_SN:
- case SUN4V_CHIP_SPARC64X:
- rover_inc_table = niagara_iterate_method;
- break;
- default:
- rover_inc_table = generic_iterate_method;
- }
- for (level = t->nodes[root_index].level; level < CPUINFO_LVL_MAX;
- level++) {
- new_index = t->nodes[index].rover;
- if (rover_inc_table[level] & ROVER_INC_ON_VISIT)
- increment_rover(t, index, root_index, rover_inc_table);
- index = new_index;
- }
- return index;
- }
- static void _cpu_map_rebuild(void)
- {
- int i;
- if (cpuinfo_tree) {
- kfree(cpuinfo_tree);
- cpuinfo_tree = NULL;
- }
- cpuinfo_tree = build_cpuinfo_tree();
- if (!cpuinfo_tree)
- return;
- /* Build CPU distribution map that spans all online CPUs. No need
- * to check if the CPU is online, as that is done when the cpuinfo
- * tree is being built.
- */
- for (i = 0; i < cpuinfo_tree->nodes[0].num_cpus; i++)
- cpu_distribution_map[i] = iterate_cpu(cpuinfo_tree, 0);
- }
- /* Fallback if the cpuinfo tree could not be built. CPU mapping is linear
- * round robin.
- */
- static int simple_map_to_cpu(unsigned int index)
- {
- int i, end, cpu_rover;
- cpu_rover = 0;
- end = index % num_online_cpus();
- for (i = 0; i < num_possible_cpus(); i++) {
- if (cpu_online(cpu_rover)) {
- if (cpu_rover >= end)
- return cpu_rover;
- cpu_rover++;
- }
- }
- /* Impossible, since num_online_cpus() <= num_possible_cpus() */
- return cpumask_first(cpu_online_mask);
- }
- static int _map_to_cpu(unsigned int index)
- {
- struct cpuinfo_node *root_node;
- if (unlikely(!cpuinfo_tree)) {
- _cpu_map_rebuild();
- if (!cpuinfo_tree)
- return simple_map_to_cpu(index);
- }
- root_node = &cpuinfo_tree->nodes[0];
- #ifdef CONFIG_HOTPLUG_CPU
- if (unlikely(root_node->num_cpus != num_online_cpus())) {
- _cpu_map_rebuild();
- if (!cpuinfo_tree)
- return simple_map_to_cpu(index);
- }
- #endif
- return cpu_distribution_map[index % root_node->num_cpus];
- }
- int map_to_cpu(unsigned int index)
- {
- int mapped_cpu;
- unsigned long flag;
- spin_lock_irqsave(&cpu_map_lock, flag);
- mapped_cpu = _map_to_cpu(index);
- #ifdef CONFIG_HOTPLUG_CPU
- while (unlikely(!cpu_online(mapped_cpu)))
- mapped_cpu = _map_to_cpu(index);
- #endif
- spin_unlock_irqrestore(&cpu_map_lock, flag);
- return mapped_cpu;
- }
- EXPORT_SYMBOL(map_to_cpu);
- void cpu_map_rebuild(void)
- {
- unsigned long flag;
- spin_lock_irqsave(&cpu_map_lock, flag);
- _cpu_map_rebuild();
- spin_unlock_irqrestore(&cpu_map_lock, flag);
- }
|