1.1 --- a/src/os/linux/vm/os_linux.cpp Mon Jun 12 13:58:09 2017 -0400 1.2 +++ b/src/os/linux/vm/os_linux.cpp Fri Jun 30 23:45:31 2017 -0700 1.3 @@ -2736,8 +2736,9 @@ 1.4 bool os::numa_topology_changed() { return false; } 1.5 1.6 size_t os::numa_get_groups_num() { 1.7 - int max_node = Linux::numa_max_node(); 1.8 - return max_node > 0 ? max_node + 1 : 1; 1.9 + // Return just the number of nodes in which it's possible to allocate memory 1.10 + // (in numa terminology, configured nodes). 1.11 + return Linux::numa_num_configured_nodes(); 1.12 } 1.13 1.14 int os::numa_get_group_id() { 1.15 @@ -2751,11 +2752,33 @@ 1.16 return 0; 1.17 } 1.18 1.19 +int os::Linux::get_existing_num_nodes() { 1.20 + size_t node; 1.21 + size_t highest_node_number = Linux::numa_max_node(); 1.22 + int num_nodes = 0; 1.23 + 1.24 + // Get the total number of nodes in the system including nodes without memory. 1.25 + for (node = 0; node <= highest_node_number; node++) { 1.26 + if (isnode_in_existing_nodes(node)) { 1.27 + num_nodes++; 1.28 + } 1.29 + } 1.30 + return num_nodes; 1.31 +} 1.32 + 1.33 size_t os::numa_get_leaf_groups(int *ids, size_t size) { 1.34 - for (size_t i = 0; i < size; i++) { 1.35 - ids[i] = i; 1.36 - } 1.37 - return size; 1.38 + size_t highest_node_number = Linux::numa_max_node(); 1.39 + size_t i = 0; 1.40 + 1.41 + // Map all node ids in which is possible to allocate memory. Also nodes are 1.42 + // not always consecutively available, i.e. available from 0 to the highest 1.43 + // node number. 1.44 + for (size_t node = 0; node <= highest_node_number; node++) { 1.45 + if (Linux::isnode_in_configured_nodes(node)) { 1.46 + ids[i++] = node; 1.47 + } 1.48 + } 1.49 + return i; 1.50 } 1.51 1.52 bool os::get_page_info(char *start, page_info* info) { 1.53 @@ -2825,18 +2848,28 @@ 1.54 libnuma_dlsym(handle, "numa_node_to_cpus"))); 1.55 set_numa_max_node(CAST_TO_FN_PTR(numa_max_node_func_t, 1.56 libnuma_dlsym(handle, "numa_max_node"))); 1.57 + set_numa_num_configured_nodes(CAST_TO_FN_PTR(numa_num_configured_nodes_func_t, 1.58 + libnuma_dlsym(handle, "numa_num_configured_nodes"))); 1.59 set_numa_available(CAST_TO_FN_PTR(numa_available_func_t, 1.60 libnuma_dlsym(handle, "numa_available"))); 1.61 set_numa_tonode_memory(CAST_TO_FN_PTR(numa_tonode_memory_func_t, 1.62 libnuma_dlsym(handle, "numa_tonode_memory"))); 1.63 set_numa_interleave_memory(CAST_TO_FN_PTR(numa_interleave_memory_func_t, 1.64 - libnuma_dlsym(handle, "numa_interleave_memory"))); 1.65 + libnuma_dlsym(handle, "numa_interleave_memory"))); 1.66 set_numa_set_bind_policy(CAST_TO_FN_PTR(numa_set_bind_policy_func_t, 1.67 - libnuma_dlsym(handle, "numa_set_bind_policy"))); 1.68 - 1.69 + libnuma_dlsym(handle, "numa_set_bind_policy"))); 1.70 + set_numa_bitmask_isbitset(CAST_TO_FN_PTR(numa_bitmask_isbitset_func_t, 1.71 + libnuma_dlsym(handle, "numa_bitmask_isbitset"))); 1.72 + set_numa_distance(CAST_TO_FN_PTR(numa_distance_func_t, 1.73 + libnuma_dlsym(handle, "numa_distance"))); 1.74 1.75 if (numa_available() != -1) { 1.76 set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle, "numa_all_nodes")); 1.77 + set_numa_all_nodes_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_all_nodes_ptr")); 1.78 + set_numa_nodes_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_nodes_ptr")); 1.79 + // Create an index -> node mapping, since nodes are not always consecutive 1.80 + _nindex_to_node = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<int>(0, true); 1.81 + rebuild_nindex_to_node_map(); 1.82 // Create a cpu -> node mapping 1.83 _cpu_to_node = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<int>(0, true); 1.84 rebuild_cpu_to_node_map(); 1.85 @@ -2847,6 +2880,17 @@ 1.86 return false; 1.87 } 1.88 1.89 +void os::Linux::rebuild_nindex_to_node_map() { 1.90 + int highest_node_number = Linux::numa_max_node(); 1.91 + 1.92 + nindex_to_node()->clear(); 1.93 + for (int node = 0; node <= highest_node_number; node++) { 1.94 + if (Linux::isnode_in_existing_nodes(node)) { 1.95 + nindex_to_node()->append(node); 1.96 + } 1.97 + } 1.98 +} 1.99 + 1.100 // rebuild_cpu_to_node_map() constructs a table mapping cpud id to node id. 1.101 // The table is later used in get_node_by_cpu(). 1.102 void os::Linux::rebuild_cpu_to_node_map() { 1.103 @@ -2866,16 +2910,46 @@ 1.104 1.105 cpu_to_node()->clear(); 1.106 cpu_to_node()->at_grow(cpu_num - 1); 1.107 - size_t node_num = numa_get_groups_num(); 1.108 - 1.109 + 1.110 + size_t node_num = get_existing_num_nodes(); 1.111 + 1.112 + int distance = 0; 1.113 + int closest_distance = INT_MAX; 1.114 + int closest_node = 0; 1.115 unsigned long *cpu_map = NEW_C_HEAP_ARRAY(unsigned long, cpu_map_size, mtInternal); 1.116 for (size_t i = 0; i < node_num; i++) { 1.117 - if (numa_node_to_cpus(i, cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) { 1.118 + // Check if node is configured (not a memory-less node). If it is not, find 1.119 + // the closest configured node. 1.120 + if (!isnode_in_configured_nodes(nindex_to_node()->at(i))) { 1.121 + closest_distance = INT_MAX; 1.122 + // Check distance from all remaining nodes in the system. Ignore distance 1.123 + // from itself and from another non-configured node. 1.124 + for (size_t m = 0; m < node_num; m++) { 1.125 + if (m != i && isnode_in_configured_nodes(nindex_to_node()->at(m))) { 1.126 + distance = numa_distance(nindex_to_node()->at(i), nindex_to_node()->at(m)); 1.127 + // If a closest node is found, update. There is always at least one 1.128 + // configured node in the system so there is always at least one node 1.129 + // close. 1.130 + if (distance != 0 && distance < closest_distance) { 1.131 + closest_distance = distance; 1.132 + closest_node = nindex_to_node()->at(m); 1.133 + } 1.134 + } 1.135 + } 1.136 + } else { 1.137 + // Current node is already a configured node. 1.138 + closest_node = nindex_to_node()->at(i); 1.139 + } 1.140 + 1.141 + // Get cpus from the original node and map them to the closest node. If node 1.142 + // is a configured node (not a memory-less node), then original node and 1.143 + // closest node are the same. 1.144 + if (numa_node_to_cpus(nindex_to_node()->at(i), cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) { 1.145 for (size_t j = 0; j < cpu_map_valid_size; j++) { 1.146 if (cpu_map[j] != 0) { 1.147 for (size_t k = 0; k < BitsPerCLong; k++) { 1.148 if (cpu_map[j] & (1UL << k)) { 1.149 - cpu_to_node()->at_put(j * BitsPerCLong + k, i); 1.150 + cpu_to_node()->at_put(j * BitsPerCLong + k, closest_node); 1.151 } 1.152 } 1.153 } 1.154 @@ -2893,14 +2967,20 @@ 1.155 } 1.156 1.157 GrowableArray<int>* os::Linux::_cpu_to_node; 1.158 +GrowableArray<int>* os::Linux::_nindex_to_node; 1.159 os::Linux::sched_getcpu_func_t os::Linux::_sched_getcpu; 1.160 os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus; 1.161 os::Linux::numa_max_node_func_t os::Linux::_numa_max_node; 1.162 +os::Linux::numa_num_configured_nodes_func_t os::Linux::_numa_num_configured_nodes; 1.163 os::Linux::numa_available_func_t os::Linux::_numa_available; 1.164 os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory; 1.165 os::Linux::numa_interleave_memory_func_t os::Linux::_numa_interleave_memory; 1.166 os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy; 1.167 +os::Linux::numa_bitmask_isbitset_func_t os::Linux::_numa_bitmask_isbitset; 1.168 +os::Linux::numa_distance_func_t os::Linux::_numa_distance; 1.169 unsigned long* os::Linux::_numa_all_nodes; 1.170 +struct bitmask* os::Linux::_numa_all_nodes_ptr; 1.171 +struct bitmask* os::Linux::_numa_nodes_ptr; 1.172 1.173 bool os::pd_uncommit_memory(char* addr, size_t size) { 1.174 uintptr_t res = (uintptr_t) ::mmap(addr, size, PROT_NONE,