Thu, 01 Jun 2017 20:42:49 -0400
8175813: PPC64: "mbind: Invalid argument" when -XX:+UseNUMA is used
Reviewed-by: dholmes, zgu
src/os/linux/vm/os_linux.cpp | file | annotate | diff | comparison | revisions | |
src/os/linux/vm/os_linux.hpp | file | annotate | diff | comparison | revisions |
1.1 --- a/src/os/linux/vm/os_linux.cpp Tue May 30 08:17:41 2017 +0000 1.2 +++ b/src/os/linux/vm/os_linux.cpp Thu Jun 01 20:42:49 2017 -0400 1.3 @@ -2736,8 +2736,9 @@ 1.4 bool os::numa_topology_changed() { return false; } 1.5 1.6 size_t os::numa_get_groups_num() { 1.7 - int max_node = Linux::numa_max_node(); 1.8 - return max_node > 0 ? max_node + 1 : 1; 1.9 + // Return just the number of nodes in which it's possible to allocate memory 1.10 + // (in numa terminology, configured nodes). 1.11 + return Linux::numa_num_configured_nodes(); 1.12 } 1.13 1.14 int os::numa_get_group_id() { 1.15 @@ -2751,11 +2752,33 @@ 1.16 return 0; 1.17 } 1.18 1.19 +int os::Linux::get_existing_num_nodes() { 1.20 + size_t node; 1.21 + size_t highest_node_number = Linux::numa_max_node(); 1.22 + int num_nodes = 0; 1.23 + 1.24 + // Get the total number of nodes in the system including nodes without memory. 1.25 + for (node = 0; node <= highest_node_number; node++) { 1.26 + if (isnode_in_existing_nodes(node)) { 1.27 + num_nodes++; 1.28 + } 1.29 + } 1.30 + return num_nodes; 1.31 +} 1.32 + 1.33 size_t os::numa_get_leaf_groups(int *ids, size_t size) { 1.34 - for (size_t i = 0; i < size; i++) { 1.35 - ids[i] = i; 1.36 - } 1.37 - return size; 1.38 + size_t highest_node_number = Linux::numa_max_node(); 1.39 + size_t i = 0; 1.40 + 1.41 + // Map all node ids in which is possible to allocate memory. Also nodes are 1.42 + // not always consecutively available, i.e. available from 0 to the highest 1.43 + // node number. 1.44 + for (size_t node = 0; node <= highest_node_number; node++) { 1.45 + if (Linux::isnode_in_configured_nodes(node)) { 1.46 + ids[i++] = node; 1.47 + } 1.48 + } 1.49 + return i; 1.50 } 1.51 1.52 bool os::get_page_info(char *start, page_info* info) { 1.53 @@ -2825,18 +2848,28 @@ 1.54 libnuma_dlsym(handle, "numa_node_to_cpus"))); 1.55 set_numa_max_node(CAST_TO_FN_PTR(numa_max_node_func_t, 1.56 libnuma_dlsym(handle, "numa_max_node"))); 1.57 + set_numa_num_configured_nodes(CAST_TO_FN_PTR(numa_num_configured_nodes_func_t, 1.58 + libnuma_dlsym(handle, "numa_num_configured_nodes"))); 1.59 set_numa_available(CAST_TO_FN_PTR(numa_available_func_t, 1.60 libnuma_dlsym(handle, "numa_available"))); 1.61 set_numa_tonode_memory(CAST_TO_FN_PTR(numa_tonode_memory_func_t, 1.62 libnuma_dlsym(handle, "numa_tonode_memory"))); 1.63 set_numa_interleave_memory(CAST_TO_FN_PTR(numa_interleave_memory_func_t, 1.64 - libnuma_dlsym(handle, "numa_interleave_memory"))); 1.65 + libnuma_dlsym(handle, "numa_interleave_memory"))); 1.66 set_numa_set_bind_policy(CAST_TO_FN_PTR(numa_set_bind_policy_func_t, 1.67 - libnuma_dlsym(handle, "numa_set_bind_policy"))); 1.68 - 1.69 + libnuma_dlsym(handle, "numa_set_bind_policy"))); 1.70 + set_numa_bitmask_isbitset(CAST_TO_FN_PTR(numa_bitmask_isbitset_func_t, 1.71 + libnuma_dlsym(handle, "numa_bitmask_isbitset"))); 1.72 + set_numa_distance(CAST_TO_FN_PTR(numa_distance_func_t, 1.73 + libnuma_dlsym(handle, "numa_distance"))); 1.74 1.75 if (numa_available() != -1) { 1.76 set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle, "numa_all_nodes")); 1.77 + set_numa_all_nodes_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_all_nodes_ptr")); 1.78 + set_numa_nodes_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_nodes_ptr")); 1.79 + // Create an index -> node mapping, since nodes are not always consecutive 1.80 + _nindex_to_node = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<int>(0, true); 1.81 + rebuild_nindex_to_node_map(); 1.82 // Create a cpu -> node mapping 1.83 _cpu_to_node = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<int>(0, true); 1.84 rebuild_cpu_to_node_map(); 1.85 @@ -2847,6 +2880,17 @@ 1.86 return false; 1.87 } 1.88 1.89 +void os::Linux::rebuild_nindex_to_node_map() { 1.90 + int highest_node_number = Linux::numa_max_node(); 1.91 + 1.92 + nindex_to_node()->clear(); 1.93 + for (int node = 0; node <= highest_node_number; node++) { 1.94 + if (Linux::isnode_in_existing_nodes(node)) { 1.95 + nindex_to_node()->append(node); 1.96 + } 1.97 + } 1.98 +} 1.99 + 1.100 // rebuild_cpu_to_node_map() constructs a table mapping cpud id to node id. 1.101 // The table is later used in get_node_by_cpu(). 1.102 void os::Linux::rebuild_cpu_to_node_map() { 1.103 @@ -2866,16 +2910,46 @@ 1.104 1.105 cpu_to_node()->clear(); 1.106 cpu_to_node()->at_grow(cpu_num - 1); 1.107 - size_t node_num = numa_get_groups_num(); 1.108 - 1.109 + 1.110 + size_t node_num = get_existing_num_nodes(); 1.111 + 1.112 + int distance = 0; 1.113 + int closest_distance = INT_MAX; 1.114 + int closest_node = 0; 1.115 unsigned long *cpu_map = NEW_C_HEAP_ARRAY(unsigned long, cpu_map_size, mtInternal); 1.116 for (size_t i = 0; i < node_num; i++) { 1.117 - if (numa_node_to_cpus(i, cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) { 1.118 + // Check if node is configured (not a memory-less node). If it is not, find 1.119 + // the closest configured node. 1.120 + if (!isnode_in_configured_nodes(nindex_to_node()->at(i))) { 1.121 + closest_distance = INT_MAX; 1.122 + // Check distance from all remaining nodes in the system. Ignore distance 1.123 + // from itself and from another non-configured node. 1.124 + for (size_t m = 0; m < node_num; m++) { 1.125 + if (m != i && isnode_in_configured_nodes(nindex_to_node()->at(m))) { 1.126 + distance = numa_distance(nindex_to_node()->at(i), nindex_to_node()->at(m)); 1.127 + // If a closest node is found, update. There is always at least one 1.128 + // configured node in the system so there is always at least one node 1.129 + // close. 1.130 + if (distance != 0 && distance < closest_distance) { 1.131 + closest_distance = distance; 1.132 + closest_node = nindex_to_node()->at(m); 1.133 + } 1.134 + } 1.135 + } 1.136 + } else { 1.137 + // Current node is already a configured node. 1.138 + closest_node = nindex_to_node()->at(i); 1.139 + } 1.140 + 1.141 + // Get cpus from the original node and map them to the closest node. If node 1.142 + // is a configured node (not a memory-less node), then original node and 1.143 + // closest node are the same. 1.144 + if (numa_node_to_cpus(nindex_to_node()->at(i), cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) { 1.145 for (size_t j = 0; j < cpu_map_valid_size; j++) { 1.146 if (cpu_map[j] != 0) { 1.147 for (size_t k = 0; k < BitsPerCLong; k++) { 1.148 if (cpu_map[j] & (1UL << k)) { 1.149 - cpu_to_node()->at_put(j * BitsPerCLong + k, i); 1.150 + cpu_to_node()->at_put(j * BitsPerCLong + k, closest_node); 1.151 } 1.152 } 1.153 } 1.154 @@ -2893,14 +2967,20 @@ 1.155 } 1.156 1.157 GrowableArray<int>* os::Linux::_cpu_to_node; 1.158 +GrowableArray<int>* os::Linux::_nindex_to_node; 1.159 os::Linux::sched_getcpu_func_t os::Linux::_sched_getcpu; 1.160 os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus; 1.161 os::Linux::numa_max_node_func_t os::Linux::_numa_max_node; 1.162 +os::Linux::numa_num_configured_nodes_func_t os::Linux::_numa_num_configured_nodes; 1.163 os::Linux::numa_available_func_t os::Linux::_numa_available; 1.164 os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory; 1.165 os::Linux::numa_interleave_memory_func_t os::Linux::_numa_interleave_memory; 1.166 os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy; 1.167 +os::Linux::numa_bitmask_isbitset_func_t os::Linux::_numa_bitmask_isbitset; 1.168 +os::Linux::numa_distance_func_t os::Linux::_numa_distance; 1.169 unsigned long* os::Linux::_numa_all_nodes; 1.170 +struct bitmask* os::Linux::_numa_all_nodes_ptr; 1.171 +struct bitmask* os::Linux::_numa_nodes_ptr; 1.172 1.173 bool os::pd_uncommit_memory(char* addr, size_t size) { 1.174 uintptr_t res = (uintptr_t) ::mmap(addr, size, PROT_NONE,
2.1 --- a/src/os/linux/vm/os_linux.hpp Tue May 30 08:17:41 2017 +0000 2.2 +++ b/src/os/linux/vm/os_linux.hpp Thu Jun 01 20:42:49 2017 -0400 2.3 @@ -67,6 +67,7 @@ 2.4 static bool _supports_fast_thread_cpu_time; 2.5 2.6 static GrowableArray<int>* _cpu_to_node; 2.7 + static GrowableArray<int>* _nindex_to_node; 2.8 2.9 protected: 2.10 2.11 @@ -94,7 +95,9 @@ 2.12 static void set_is_floating_stack() { _is_floating_stack = true; } 2.13 2.14 static void rebuild_cpu_to_node_map(); 2.15 + static void rebuild_nindex_to_node_map(); 2.16 static GrowableArray<int>* cpu_to_node() { return _cpu_to_node; } 2.17 + static GrowableArray<int>* nindex_to_node() { return _nindex_to_node; } 2.18 2.19 static size_t find_large_page_size(); 2.20 static size_t setup_large_page_size(); 2.21 @@ -243,28 +246,41 @@ 2.22 typedef int (*sched_getcpu_func_t)(void); 2.23 typedef int (*numa_node_to_cpus_func_t)(int node, unsigned long *buffer, int bufferlen); 2.24 typedef int (*numa_max_node_func_t)(void); 2.25 + typedef int (*numa_num_configured_nodes_func_t)(void); 2.26 typedef int (*numa_available_func_t)(void); 2.27 typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node); 2.28 typedef void (*numa_interleave_memory_func_t)(void *start, size_t size, unsigned long *nodemask); 2.29 typedef void (*numa_set_bind_policy_func_t)(int policy); 2.30 + typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp, unsigned int n); 2.31 + typedef int (*numa_distance_func_t)(int node1, int node2); 2.32 2.33 static sched_getcpu_func_t _sched_getcpu; 2.34 static numa_node_to_cpus_func_t _numa_node_to_cpus; 2.35 static numa_max_node_func_t _numa_max_node; 2.36 + static numa_num_configured_nodes_func_t _numa_num_configured_nodes; 2.37 static numa_available_func_t _numa_available; 2.38 static numa_tonode_memory_func_t _numa_tonode_memory; 2.39 static numa_interleave_memory_func_t _numa_interleave_memory; 2.40 static numa_set_bind_policy_func_t _numa_set_bind_policy; 2.41 + static numa_bitmask_isbitset_func_t _numa_bitmask_isbitset; 2.42 + static numa_distance_func_t _numa_distance; 2.43 static unsigned long* _numa_all_nodes; 2.44 + static struct bitmask* _numa_all_nodes_ptr; 2.45 + static struct bitmask* _numa_nodes_ptr; 2.46 2.47 static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; } 2.48 static void set_numa_node_to_cpus(numa_node_to_cpus_func_t func) { _numa_node_to_cpus = func; } 2.49 static void set_numa_max_node(numa_max_node_func_t func) { _numa_max_node = func; } 2.50 + static void set_numa_num_configured_nodes(numa_num_configured_nodes_func_t func) { _numa_num_configured_nodes = func; } 2.51 static void set_numa_available(numa_available_func_t func) { _numa_available = func; } 2.52 static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; } 2.53 static void set_numa_interleave_memory(numa_interleave_memory_func_t func) { _numa_interleave_memory = func; } 2.54 static void set_numa_set_bind_policy(numa_set_bind_policy_func_t func) { _numa_set_bind_policy = func; } 2.55 + static void set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t func) { _numa_bitmask_isbitset = func; } 2.56 + static void set_numa_distance(numa_distance_func_t func) { _numa_distance = func; } 2.57 static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; } 2.58 + static void set_numa_all_nodes_ptr(struct bitmask **ptr) { _numa_all_nodes_ptr = *ptr; } 2.59 + static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr = *ptr; } 2.60 static int sched_getcpu_syscall(void); 2.61 public: 2.62 static int sched_getcpu() { return _sched_getcpu != NULL ? _sched_getcpu() : -1; } 2.63 @@ -272,6 +288,9 @@ 2.64 return _numa_node_to_cpus != NULL ? _numa_node_to_cpus(node, buffer, bufferlen) : -1; 2.65 } 2.66 static int numa_max_node() { return _numa_max_node != NULL ? _numa_max_node() : -1; } 2.67 + static int numa_num_configured_nodes() { 2.68 + return _numa_num_configured_nodes != NULL ? _numa_num_configured_nodes() : -1; 2.69 + } 2.70 static int numa_available() { return _numa_available != NULL ? _numa_available() : -1; } 2.71 static int numa_tonode_memory(void *start, size_t size, int node) { 2.72 return _numa_tonode_memory != NULL ? _numa_tonode_memory(start, size, node) : -1; 2.73 @@ -286,7 +305,25 @@ 2.74 _numa_set_bind_policy(policy); 2.75 } 2.76 } 2.77 + static int numa_distance(int node1, int node2) { 2.78 + return _numa_distance != NULL ? _numa_distance(node1, node2) : -1; 2.79 + } 2.80 static int get_node_by_cpu(int cpu_id); 2.81 + static int get_existing_num_nodes(); 2.82 + // Check if numa node is configured (non-zero memory node). 2.83 + static bool isnode_in_configured_nodes(unsigned int n) { 2.84 + if (_numa_bitmask_isbitset != NULL && _numa_all_nodes_ptr != NULL) { 2.85 + return _numa_bitmask_isbitset(_numa_all_nodes_ptr, n); 2.86 + } else 2.87 + return 0; 2.88 + } 2.89 + // Check if numa node exists in the system (including zero memory nodes). 2.90 + static bool isnode_in_existing_nodes(unsigned int n) { 2.91 + if (_numa_bitmask_isbitset != NULL && _numa_nodes_ptr != NULL) { 2.92 + return _numa_bitmask_isbitset(_numa_nodes_ptr, n); 2.93 + } else 2.94 + return 0; 2.95 + } 2.96 }; 2.97 2.98