1.1 --- a/src/os/linux/vm/os_linux.cpp Thu Apr 23 18:00:50 2015 +0200 1.2 +++ b/src/os/linux/vm/os_linux.cpp Mon Apr 25 11:36:14 2016 +0200 1.3 @@ -3047,393 +3047,6 @@ 1.4 return addr == MAP_FAILED ? NULL : addr; 1.5 } 1.6 1.7 -// Don't update _highest_vm_reserved_address, because there might be memory 1.8 -// regions above addr + size. If so, releasing a memory region only creates 1.9 -// a hole in the address space, it doesn't help prevent heap-stack collision. 1.10 -// 1.11 -static int anon_munmap(char * addr, size_t size) { 1.12 - return ::munmap(addr, size) == 0; 1.13 -} 1.14 - 1.15 -char* os::pd_reserve_memory(size_t bytes, char* requested_addr, 1.16 - size_t alignment_hint) { 1.17 - return anon_mmap(requested_addr, bytes, (requested_addr != NULL)); 1.18 -} 1.19 - 1.20 -bool os::pd_release_memory(char* addr, size_t size) { 1.21 - return anon_munmap(addr, size); 1.22 -} 1.23 - 1.24 -static address highest_vm_reserved_address() { 1.25 - return _highest_vm_reserved_address; 1.26 -} 1.27 - 1.28 -static bool linux_mprotect(char* addr, size_t size, int prot) { 1.29 - // Linux wants the mprotect address argument to be page aligned. 1.30 - char* bottom = (char*)align_size_down((intptr_t)addr, os::Linux::page_size()); 1.31 - 1.32 - // According to SUSv3, mprotect() should only be used with mappings 1.33 - // established by mmap(), and mmap() always maps whole pages. Unaligned 1.34 - // 'addr' likely indicates problem in the VM (e.g. trying to change 1.35 - // protection of malloc'ed or statically allocated memory). Check the 1.36 - // caller if you hit this assert. 1.37 - assert(addr == bottom, "sanity check"); 1.38 - 1.39 - size = align_size_up(pointer_delta(addr, bottom, 1) + size, os::Linux::page_size()); 1.40 - return ::mprotect(bottom, size, prot) == 0; 1.41 -} 1.42 - 1.43 -// Set protections specified 1.44 -bool os::protect_memory(char* addr, size_t bytes, ProtType prot, 1.45 - bool is_committed) { 1.46 - unsigned int p = 0; 1.47 - switch (prot) { 1.48 - case MEM_PROT_NONE: p = PROT_NONE; break; 1.49 - case MEM_PROT_READ: p = PROT_READ; break; 1.50 - case MEM_PROT_RW: p = PROT_READ|PROT_WRITE; break; 1.51 - case MEM_PROT_RWX: p = PROT_READ|PROT_WRITE|PROT_EXEC; break; 1.52 - default: 1.53 - ShouldNotReachHere(); 1.54 - } 1.55 - // is_committed is unused. 1.56 - return linux_mprotect(addr, bytes, p); 1.57 -} 1.58 - 1.59 -bool os::guard_memory(char* addr, size_t size) { 1.60 - return linux_mprotect(addr, size, PROT_NONE); 1.61 -} 1.62 - 1.63 -bool os::unguard_memory(char* addr, size_t size) { 1.64 - return linux_mprotect(addr, size, PROT_READ|PROT_WRITE); 1.65 -} 1.66 - 1.67 -bool os::Linux::transparent_huge_pages_sanity_check(bool warn, size_t page_size) { 1.68 - bool result = false; 1.69 - void *p = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE, 1.70 - MAP_ANONYMOUS|MAP_PRIVATE, 1.71 - -1, 0); 1.72 - if (p != MAP_FAILED) { 1.73 - void *aligned_p = align_ptr_up(p, page_size); 1.74 - 1.75 - result = madvise(aligned_p, page_size, MADV_HUGEPAGE) == 0; 1.76 - 1.77 - munmap(p, page_size * 2); 1.78 - } 1.79 - 1.80 - if (warn && !result) { 1.81 - warning("TransparentHugePages is not supported by the operating system."); 1.82 - } 1.83 - 1.84 - return result; 1.85 -} 1.86 - 1.87 -bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) { 1.88 - bool result = false; 1.89 - void *p = mmap(NULL, page_size, PROT_READ|PROT_WRITE, 1.90 - MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB, 1.91 - -1, 0); 1.92 - 1.93 - if (p != MAP_FAILED) { 1.94 - // We don't know if this really is a huge page or not. 1.95 - FILE *fp = fopen("/proc/self/maps", "r"); 1.96 - if (fp) { 1.97 - while (!feof(fp)) { 1.98 - char chars[257]; 1.99 - long x = 0; 1.100 - if (fgets(chars, sizeof(chars), fp)) { 1.101 - if (sscanf(chars, "%lx-%*x", &x) == 1 1.102 - && x == (long)p) { 1.103 - if (strstr (chars, "hugepage")) { 1.104 - result = true; 1.105 - break; 1.106 - } 1.107 - } 1.108 - } 1.109 - } 1.110 - fclose(fp); 1.111 - } 1.112 - munmap(p, page_size); 1.113 - } 1.114 - 1.115 - if (warn && !result) { 1.116 - warning("HugeTLBFS is not supported by the operating system."); 1.117 - } 1.118 - 1.119 - return result; 1.120 -} 1.121 - 1.122 -/* 1.123 -* Set the coredump_filter bits to include largepages in core dump (bit 6) 1.124 -* 1.125 -* From the coredump_filter documentation: 1.126 -* 1.127 -* - (bit 0) anonymous private memory 1.128 -* - (bit 1) anonymous shared memory 1.129 -* - (bit 2) file-backed private memory 1.130 -* - (bit 3) file-backed shared memory 1.131 -* - (bit 4) ELF header pages in file-backed private memory areas (it is 1.132 -* effective only if the bit 2 is cleared) 1.133 -* - (bit 5) hugetlb private memory 1.134 -* - (bit 6) hugetlb shared memory 1.135 -*/ 1.136 -static void set_coredump_filter(void) { 1.137 - FILE *f; 1.138 - long cdm; 1.139 - 1.140 - if ((f = fopen("/proc/self/coredump_filter", "r+")) == NULL) { 1.141 - return; 1.142 - } 1.143 - 1.144 - if (fscanf(f, "%lx", &cdm) != 1) { 1.145 - fclose(f); 1.146 - return; 1.147 - } 1.148 - 1.149 - rewind(f); 1.150 - 1.151 - if ((cdm & LARGEPAGES_BIT) == 0) { 1.152 - cdm |= LARGEPAGES_BIT; 1.153 - fprintf(f, "%#lx", cdm); 1.154 - } 1.155 - 1.156 - fclose(f); 1.157 -} 1.158 - 1.159 -// Large page support 1.160 - 1.161 -static size_t _large_page_size = 0; 1.162 - 1.163 -size_t os::Linux::find_large_page_size() { 1.164 - size_t large_page_size = 0; 1.165 - 1.166 - // large_page_size on Linux is used to round up heap size. x86 uses either 1.167 - // 2M or 4M page, depending on whether PAE (Physical Address Extensions) 1.168 - // mode is enabled. AMD64/EM64T uses 2M page in 64bit mode. IA64 can use 1.169 - // page as large as 256M. 1.170 - // 1.171 - // Here we try to figure out page size by parsing /proc/meminfo and looking 1.172 - // for a line with the following format: 1.173 - // Hugepagesize: 2048 kB 1.174 - // 1.175 - // If we can't determine the value (e.g. /proc is not mounted, or the text 1.176 - // format has been changed), we'll use the largest page size supported by 1.177 - // the processor. 1.178 - 1.179 -#ifndef ZERO 1.180 - large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M) 1.181 - ARM_ONLY(2 * M) PPC_ONLY(4 * M); 1.182 -#endif // ZERO 1.183 - 1.184 - FILE *fp = fopen("/proc/meminfo", "r"); 1.185 - if (fp) { 1.186 - while (!feof(fp)) { 1.187 - int x = 0; 1.188 - char buf[16]; 1.189 - if (fscanf(fp, "Hugepagesize: %d", &x) == 1) { 1.190 - if (x && fgets(buf, sizeof(buf), fp) && strcmp(buf, " kB\n") == 0) { 1.191 - large_page_size = x * K; 1.192 - break; 1.193 - } 1.194 - } else { 1.195 - // skip to next line 1.196 - for (;;) { 1.197 - int ch = fgetc(fp); 1.198 - if (ch == EOF || ch == (int)'\n') break; 1.199 - } 1.200 - } 1.201 - } 1.202 - fclose(fp); 1.203 - } 1.204 - 1.205 - if (!FLAG_IS_DEFAULT(LargePageSizeInBytes) && LargePageSizeInBytes != large_page_size) { 1.206 - warning("Setting LargePageSizeInBytes has no effect on this OS. Large page size is " 1.207 - SIZE_FORMAT "%s.", byte_size_in_proper_unit(large_page_size), 1.208 - proper_unit_for_byte_size(large_page_size)); 1.209 - } 1.210 - 1.211 - return large_page_size; 1.212 -} 1.213 - 1.214 -size_t os::Linux::setup_large_page_size() { 1.215 - _large_page_size = Linux::find_large_page_size(); 1.216 - const size_t default_page_size = (size_t)Linux::page_size(); 1.217 - if (_large_page_size > default_page_size) { 1.218 - _page_sizes[0] = _large_page_size; 1.219 - _page_sizes[1] = default_page_size; 1.220 - _page_sizes[2] = 0; 1.221 - } 1.222 - 1.223 - return _large_page_size; 1.224 -} 1.225 - 1.226 -bool os::Linux::setup_large_page_type(size_t page_size) { 1.227 - if (FLAG_IS_DEFAULT(UseHugeTLBFS) && 1.228 - FLAG_IS_DEFAULT(UseSHM) && 1.229 - FLAG_IS_DEFAULT(UseTransparentHugePages)) { 1.230 - 1.231 - // The type of large pages has not been specified by the user. 1.232 - 1.233 - // Try UseHugeTLBFS and then UseSHM. 1.234 - UseHugeTLBFS = UseSHM = true; 1.235 - 1.236 - // Don't try UseTransparentHugePages since there are known 1.237 - // performance issues with it turned on. This might change in the future. 1.238 - UseTransparentHugePages = false; 1.239 - } 1.240 - 1.241 - if (UseTransparentHugePages) { 1.242 - bool warn_on_failure = !FLAG_IS_DEFAULT(UseTransparentHugePages); 1.243 - if (transparent_huge_pages_sanity_check(warn_on_failure, page_size)) { 1.244 - UseHugeTLBFS = false; 1.245 - UseSHM = false; 1.246 - return true; 1.247 - } 1.248 - UseTransparentHugePages = false; 1.249 - } 1.250 - 1.251 - if (UseHugeTLBFS) { 1.252 - bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS); 1.253 - if (hugetlbfs_sanity_check(warn_on_failure, page_size)) { 1.254 - UseSHM = false; 1.255 - return true; 1.256 - } 1.257 - UseHugeTLBFS = false; 1.258 - } 1.259 - 1.260 - return UseSHM; 1.261 -} 1.262 - 1.263 -void os::large_page_init() { 1.264 - if (!UseLargePages && 1.265 - !UseTransparentHugePages && 1.266 - !UseHugeTLBFS && 1.267 - !UseSHM) { 1.268 - // Not using large pages. 1.269 - return; 1.270 - } 1.271 - 1.272 - if (!FLAG_IS_DEFAULT(UseLargePages) && !UseLargePages) { 1.273 - // The user explicitly turned off large pages. 1.274 - // Ignore the rest of the large pages flags. 1.275 - UseTransparentHugePages = false; 1.276 - UseHugeTLBFS = false; 1.277 - UseSHM = false; 1.278 - return; 1.279 - } 1.280 - 1.281 - size_t large_page_size = Linux::setup_large_page_size(); 1.282 - UseLargePages = Linux::setup_large_page_type(large_page_size); 1.283 - 1.284 - set_coredump_filter(); 1.285 -} 1.286 - 1.287 -#ifndef SHM_HUGETLB 1.288 -#define SHM_HUGETLB 04000 1.289 -#endif 1.290 - 1.291 -char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec) { 1.292 - // "exec" is passed in but not used. Creating the shared image for 1.293 - // the code cache doesn't have an SHM_X executable permission to check. 1.294 - assert(UseLargePages && UseSHM, "only for SHM large pages"); 1.295 - assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address"); 1.296 - 1.297 - if (!is_size_aligned(bytes, os::large_page_size()) || alignment > os::large_page_size()) { 1.298 - return NULL; // Fallback to small pages. 1.299 - } 1.300 - 1.301 - key_t key = IPC_PRIVATE; 1.302 - char *addr; 1.303 - 1.304 - bool warn_on_failure = UseLargePages && 1.305 - (!FLAG_IS_DEFAULT(UseLargePages) || 1.306 - !FLAG_IS_DEFAULT(UseSHM) || 1.307 - !FLAG_IS_DEFAULT(LargePageSizeInBytes) 1.308 - ); 1.309 - char msg[128]; 1.310 - 1.311 - // Create a large shared memory region to attach to based on size. 1.312 - // Currently, size is the total size of the heap 1.313 - int shmid = shmget(key, bytes, SHM_HUGETLB|IPC_CREAT|SHM_R|SHM_W); 1.314 - if (shmid == -1) { 1.315 - // Possible reasons for shmget failure: 1.316 - // 1. shmmax is too small for Java heap. 1.317 - // > check shmmax value: cat /proc/sys/kernel/shmmax 1.318 - // > increase shmmax value: echo "0xffffffff" > /proc/sys/kernel/shmmax 1.319 - // 2. not enough large page memory. 1.320 - // > check available large pages: cat /proc/meminfo 1.321 - // > increase amount of large pages: 1.322 - // echo new_value > /proc/sys/vm/nr_hugepages 1.323 - // Note 1: different Linux may use different name for this property, 1.324 - // e.g. on Redhat AS-3 it is "hugetlb_pool". 1.325 - // Note 2: it's possible there's enough physical memory available but 1.326 - // they are so fragmented after a long run that they can't 1.327 - // coalesce into large pages. Try to reserve large pages when 1.328 - // the system is still "fresh". 1.329 - if (warn_on_failure) { 1.330 - jio_snprintf(msg, sizeof(msg), "Failed to reserve shared memory (errno = %d).", errno); 1.331 - warning("%s", msg); 1.332 - } 1.333 - return NULL; 1.334 - } 1.335 - 1.336 - // attach to the region 1.337 - addr = (char*)shmat(shmid, req_addr, 0); 1.338 - int err = errno; 1.339 - 1.340 - // Remove shmid. If shmat() is successful, the actual shared memory segment 1.341 - // will be deleted when it's detached by shmdt() or when the process 1.342 - // terminates. If shmat() is not successful this will remove the shared 1.343 - // segment immediately. 1.344 - shmctl(shmid, IPC_RMID, NULL); 1.345 - 1.346 - if ((intptr_t)addr == -1) { 1.347 - if (warn_on_failure) { 1.348 - jio_snprintf(msg, sizeof(msg), "Failed to attach shared memory (errno = %d).", err); 1.349 - warning("%s", msg); 1.350 - } 1.351 - return NULL; 1.352 - } 1.353 - 1.354 - return addr; 1.355 -} 1.356 - 1.357 -static void warn_on_large_pages_failure(char* req_addr, size_t bytes, int error) { 1.358 - assert(error == ENOMEM, "Only expect to fail if no memory is available"); 1.359 - 1.360 - bool warn_on_failure = UseLargePages && 1.361 - (!FLAG_IS_DEFAULT(UseLargePages) || 1.362 - !FLAG_IS_DEFAULT(UseHugeTLBFS) || 1.363 - !FLAG_IS_DEFAULT(LargePageSizeInBytes)); 1.364 - 1.365 - if (warn_on_failure) { 1.366 - char msg[128]; 1.367 - jio_snprintf(msg, sizeof(msg), "Failed to reserve large pages memory req_addr: " 1.368 - PTR_FORMAT " bytes: " SIZE_FORMAT " (errno = %d).", req_addr, bytes, error); 1.369 - warning("%s", msg); 1.370 - } 1.371 -} 1.372 - 1.373 -char* os::Linux::reserve_memory_special_huge_tlbfs_only(size_t bytes, char* req_addr, bool exec) { 1.374 - assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages"); 1.375 - assert(is_size_aligned(bytes, os::large_page_size()), "Unaligned size"); 1.376 - assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address"); 1.377 - 1.378 - int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE; 1.379 - char* addr = (char*)::mmap(req_addr, bytes, prot, 1.380 - MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, 1.381 - -1, 0); 1.382 - 1.383 - if (addr == MAP_FAILED) { 1.384 - warn_on_large_pages_failure(req_addr, bytes, errno); 1.385 - return NULL; 1.386 - } 1.387 - 1.388 - assert(is_ptr_aligned(addr, os::large_page_size()), "Must be"); 1.389 - 1.390 - return addr; 1.391 -} 1.392 - 1.393 -// Helper for os::Linux::reserve_memory_special_huge_tlbfs_mixed(). 1.394 // Allocate (using mmap, NO_RESERVE, with small pages) at either a given request address 1.395 // (req_addr != NULL) or with a given alignment. 1.396 // - bytes shall be a multiple of alignment. 1.397 @@ -3474,7 +3087,463 @@ 1.398 } 1.399 } 1.400 return start; 1.401 - 1.402 +} 1.403 + 1.404 +// Don't update _highest_vm_reserved_address, because there might be memory 1.405 +// regions above addr + size. If so, releasing a memory region only creates 1.406 +// a hole in the address space, it doesn't help prevent heap-stack collision. 1.407 +// 1.408 +static int anon_munmap(char * addr, size_t size) { 1.409 + return ::munmap(addr, size) == 0; 1.410 +} 1.411 + 1.412 +char* os::pd_reserve_memory(size_t bytes, char* requested_addr, 1.413 + size_t alignment_hint) { 1.414 + return anon_mmap(requested_addr, bytes, (requested_addr != NULL)); 1.415 +} 1.416 + 1.417 +bool os::pd_release_memory(char* addr, size_t size) { 1.418 + return anon_munmap(addr, size); 1.419 +} 1.420 + 1.421 +static address highest_vm_reserved_address() { 1.422 + return _highest_vm_reserved_address; 1.423 +} 1.424 + 1.425 +static bool linux_mprotect(char* addr, size_t size, int prot) { 1.426 + // Linux wants the mprotect address argument to be page aligned. 1.427 + char* bottom = (char*)align_size_down((intptr_t)addr, os::Linux::page_size()); 1.428 + 1.429 + // According to SUSv3, mprotect() should only be used with mappings 1.430 + // established by mmap(), and mmap() always maps whole pages. Unaligned 1.431 + // 'addr' likely indicates problem in the VM (e.g. trying to change 1.432 + // protection of malloc'ed or statically allocated memory). Check the 1.433 + // caller if you hit this assert. 1.434 + assert(addr == bottom, "sanity check"); 1.435 + 1.436 + size = align_size_up(pointer_delta(addr, bottom, 1) + size, os::Linux::page_size()); 1.437 + return ::mprotect(bottom, size, prot) == 0; 1.438 +} 1.439 + 1.440 +// Set protections specified 1.441 +bool os::protect_memory(char* addr, size_t bytes, ProtType prot, 1.442 + bool is_committed) { 1.443 + unsigned int p = 0; 1.444 + switch (prot) { 1.445 + case MEM_PROT_NONE: p = PROT_NONE; break; 1.446 + case MEM_PROT_READ: p = PROT_READ; break; 1.447 + case MEM_PROT_RW: p = PROT_READ|PROT_WRITE; break; 1.448 + case MEM_PROT_RWX: p = PROT_READ|PROT_WRITE|PROT_EXEC; break; 1.449 + default: 1.450 + ShouldNotReachHere(); 1.451 + } 1.452 + // is_committed is unused. 1.453 + return linux_mprotect(addr, bytes, p); 1.454 +} 1.455 + 1.456 +bool os::guard_memory(char* addr, size_t size) { 1.457 + return linux_mprotect(addr, size, PROT_NONE); 1.458 +} 1.459 + 1.460 +bool os::unguard_memory(char* addr, size_t size) { 1.461 + return linux_mprotect(addr, size, PROT_READ|PROT_WRITE); 1.462 +} 1.463 + 1.464 +bool os::Linux::transparent_huge_pages_sanity_check(bool warn, size_t page_size) { 1.465 + bool result = false; 1.466 + void *p = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE, 1.467 + MAP_ANONYMOUS|MAP_PRIVATE, 1.468 + -1, 0); 1.469 + if (p != MAP_FAILED) { 1.470 + void *aligned_p = align_ptr_up(p, page_size); 1.471 + 1.472 + result = madvise(aligned_p, page_size, MADV_HUGEPAGE) == 0; 1.473 + 1.474 + munmap(p, page_size * 2); 1.475 + } 1.476 + 1.477 + if (warn && !result) { 1.478 + warning("TransparentHugePages is not supported by the operating system."); 1.479 + } 1.480 + 1.481 + return result; 1.482 +} 1.483 + 1.484 +bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) { 1.485 + bool result = false; 1.486 + void *p = mmap(NULL, page_size, PROT_READ|PROT_WRITE, 1.487 + MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB, 1.488 + -1, 0); 1.489 + 1.490 + if (p != MAP_FAILED) { 1.491 + // We don't know if this really is a huge page or not. 1.492 + FILE *fp = fopen("/proc/self/maps", "r"); 1.493 + if (fp) { 1.494 + while (!feof(fp)) { 1.495 + char chars[257]; 1.496 + long x = 0; 1.497 + if (fgets(chars, sizeof(chars), fp)) { 1.498 + if (sscanf(chars, "%lx-%*x", &x) == 1 1.499 + && x == (long)p) { 1.500 + if (strstr (chars, "hugepage")) { 1.501 + result = true; 1.502 + break; 1.503 + } 1.504 + } 1.505 + } 1.506 + } 1.507 + fclose(fp); 1.508 + } 1.509 + munmap(p, page_size); 1.510 + } 1.511 + 1.512 + if (warn && !result) { 1.513 + warning("HugeTLBFS is not supported by the operating system."); 1.514 + } 1.515 + 1.516 + return result; 1.517 +} 1.518 + 1.519 +/* 1.520 +* Set the coredump_filter bits to include largepages in core dump (bit 6) 1.521 +* 1.522 +* From the coredump_filter documentation: 1.523 +* 1.524 +* - (bit 0) anonymous private memory 1.525 +* - (bit 1) anonymous shared memory 1.526 +* - (bit 2) file-backed private memory 1.527 +* - (bit 3) file-backed shared memory 1.528 +* - (bit 4) ELF header pages in file-backed private memory areas (it is 1.529 +* effective only if the bit 2 is cleared) 1.530 +* - (bit 5) hugetlb private memory 1.531 +* - (bit 6) hugetlb shared memory 1.532 +*/ 1.533 +static void set_coredump_filter(void) { 1.534 + FILE *f; 1.535 + long cdm; 1.536 + 1.537 + if ((f = fopen("/proc/self/coredump_filter", "r+")) == NULL) { 1.538 + return; 1.539 + } 1.540 + 1.541 + if (fscanf(f, "%lx", &cdm) != 1) { 1.542 + fclose(f); 1.543 + return; 1.544 + } 1.545 + 1.546 + rewind(f); 1.547 + 1.548 + if ((cdm & LARGEPAGES_BIT) == 0) { 1.549 + cdm |= LARGEPAGES_BIT; 1.550 + fprintf(f, "%#lx", cdm); 1.551 + } 1.552 + 1.553 + fclose(f); 1.554 +} 1.555 + 1.556 +// Large page support 1.557 + 1.558 +static size_t _large_page_size = 0; 1.559 + 1.560 +size_t os::Linux::find_large_page_size() { 1.561 + size_t large_page_size = 0; 1.562 + 1.563 + // large_page_size on Linux is used to round up heap size. x86 uses either 1.564 + // 2M or 4M page, depending on whether PAE (Physical Address Extensions) 1.565 + // mode is enabled. AMD64/EM64T uses 2M page in 64bit mode. IA64 can use 1.566 + // page as large as 256M. 1.567 + // 1.568 + // Here we try to figure out page size by parsing /proc/meminfo and looking 1.569 + // for a line with the following format: 1.570 + // Hugepagesize: 2048 kB 1.571 + // 1.572 + // If we can't determine the value (e.g. /proc is not mounted, or the text 1.573 + // format has been changed), we'll use the largest page size supported by 1.574 + // the processor. 1.575 + 1.576 +#ifndef ZERO 1.577 + large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M) 1.578 + ARM_ONLY(2 * M) PPC_ONLY(4 * M); 1.579 +#endif // ZERO 1.580 + 1.581 + FILE *fp = fopen("/proc/meminfo", "r"); 1.582 + if (fp) { 1.583 + while (!feof(fp)) { 1.584 + int x = 0; 1.585 + char buf[16]; 1.586 + if (fscanf(fp, "Hugepagesize: %d", &x) == 1) { 1.587 + if (x && fgets(buf, sizeof(buf), fp) && strcmp(buf, " kB\n") == 0) { 1.588 + large_page_size = x * K; 1.589 + break; 1.590 + } 1.591 + } else { 1.592 + // skip to next line 1.593 + for (;;) { 1.594 + int ch = fgetc(fp); 1.595 + if (ch == EOF || ch == (int)'\n') break; 1.596 + } 1.597 + } 1.598 + } 1.599 + fclose(fp); 1.600 + } 1.601 + 1.602 + if (!FLAG_IS_DEFAULT(LargePageSizeInBytes) && LargePageSizeInBytes != large_page_size) { 1.603 + warning("Setting LargePageSizeInBytes has no effect on this OS. Large page size is " 1.604 + SIZE_FORMAT "%s.", byte_size_in_proper_unit(large_page_size), 1.605 + proper_unit_for_byte_size(large_page_size)); 1.606 + } 1.607 + 1.608 + return large_page_size; 1.609 +} 1.610 + 1.611 +size_t os::Linux::setup_large_page_size() { 1.612 + _large_page_size = Linux::find_large_page_size(); 1.613 + const size_t default_page_size = (size_t)Linux::page_size(); 1.614 + if (_large_page_size > default_page_size) { 1.615 + _page_sizes[0] = _large_page_size; 1.616 + _page_sizes[1] = default_page_size; 1.617 + _page_sizes[2] = 0; 1.618 + } 1.619 + 1.620 + return _large_page_size; 1.621 +} 1.622 + 1.623 +bool os::Linux::setup_large_page_type(size_t page_size) { 1.624 + if (FLAG_IS_DEFAULT(UseHugeTLBFS) && 1.625 + FLAG_IS_DEFAULT(UseSHM) && 1.626 + FLAG_IS_DEFAULT(UseTransparentHugePages)) { 1.627 + 1.628 + // The type of large pages has not been specified by the user. 1.629 + 1.630 + // Try UseHugeTLBFS and then UseSHM. 1.631 + UseHugeTLBFS = UseSHM = true; 1.632 + 1.633 + // Don't try UseTransparentHugePages since there are known 1.634 + // performance issues with it turned on. This might change in the future. 1.635 + UseTransparentHugePages = false; 1.636 + } 1.637 + 1.638 + if (UseTransparentHugePages) { 1.639 + bool warn_on_failure = !FLAG_IS_DEFAULT(UseTransparentHugePages); 1.640 + if (transparent_huge_pages_sanity_check(warn_on_failure, page_size)) { 1.641 + UseHugeTLBFS = false; 1.642 + UseSHM = false; 1.643 + return true; 1.644 + } 1.645 + UseTransparentHugePages = false; 1.646 + } 1.647 + 1.648 + if (UseHugeTLBFS) { 1.649 + bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS); 1.650 + if (hugetlbfs_sanity_check(warn_on_failure, page_size)) { 1.651 + UseSHM = false; 1.652 + return true; 1.653 + } 1.654 + UseHugeTLBFS = false; 1.655 + } 1.656 + 1.657 + return UseSHM; 1.658 +} 1.659 + 1.660 +void os::large_page_init() { 1.661 + if (!UseLargePages && 1.662 + !UseTransparentHugePages && 1.663 + !UseHugeTLBFS && 1.664 + !UseSHM) { 1.665 + // Not using large pages. 1.666 + return; 1.667 + } 1.668 + 1.669 + if (!FLAG_IS_DEFAULT(UseLargePages) && !UseLargePages) { 1.670 + // The user explicitly turned off large pages. 1.671 + // Ignore the rest of the large pages flags. 1.672 + UseTransparentHugePages = false; 1.673 + UseHugeTLBFS = false; 1.674 + UseSHM = false; 1.675 + return; 1.676 + } 1.677 + 1.678 + size_t large_page_size = Linux::setup_large_page_size(); 1.679 + UseLargePages = Linux::setup_large_page_type(large_page_size); 1.680 + 1.681 + set_coredump_filter(); 1.682 +} 1.683 + 1.684 +#ifndef SHM_HUGETLB 1.685 +#define SHM_HUGETLB 04000 1.686 +#endif 1.687 + 1.688 +#define shm_warning_format(format, ...) \ 1.689 + do { \ 1.690 + if (UseLargePages && \ 1.691 + (!FLAG_IS_DEFAULT(UseLargePages) || \ 1.692 + !FLAG_IS_DEFAULT(UseSHM) || \ 1.693 + !FLAG_IS_DEFAULT(LargePageSizeInBytes))) { \ 1.694 + warning(format, __VA_ARGS__); \ 1.695 + } \ 1.696 + } while (0) 1.697 + 1.698 +#define shm_warning(str) shm_warning_format("%s", str) 1.699 + 1.700 +#define shm_warning_with_errno(str) \ 1.701 + do { \ 1.702 + int err = errno; \ 1.703 + shm_warning_format(str " (error = %d)", err); \ 1.704 + } while (0) 1.705 + 1.706 +static char* shmat_with_alignment(int shmid, size_t bytes, size_t alignment) { 1.707 + assert(is_size_aligned(bytes, alignment), "Must be divisible by the alignment"); 1.708 + 1.709 + if (!is_size_aligned(alignment, SHMLBA)) { 1.710 + assert(false, "Code below assumes that alignment is at least SHMLBA aligned"); 1.711 + return NULL; 1.712 + } 1.713 + 1.714 + // To ensure that we get 'alignment' aligned memory from shmat, 1.715 + // we pre-reserve aligned virtual memory and then attach to that. 1.716 + 1.717 + char* pre_reserved_addr = anon_mmap_aligned(bytes, alignment, NULL); 1.718 + if (pre_reserved_addr == NULL) { 1.719 + // Couldn't pre-reserve aligned memory. 1.720 + shm_warning("Failed to pre-reserve aligned memory for shmat."); 1.721 + return NULL; 1.722 + } 1.723 + 1.724 + // SHM_REMAP is needed to allow shmat to map over an existing mapping. 1.725 + char* addr = (char*)shmat(shmid, pre_reserved_addr, SHM_REMAP); 1.726 + 1.727 + if ((intptr_t)addr == -1) { 1.728 + int err = errno; 1.729 + shm_warning_with_errno("Failed to attach shared memory."); 1.730 + 1.731 + assert(err != EACCES, "Unexpected error"); 1.732 + assert(err != EIDRM, "Unexpected error"); 1.733 + assert(err != EINVAL, "Unexpected error"); 1.734 + 1.735 + // Since we don't know if the kernel unmapped the pre-reserved memory area 1.736 + // we can't unmap it, since that would potentially unmap memory that was 1.737 + // mapped from other threads. 1.738 + return NULL; 1.739 + } 1.740 + 1.741 + return addr; 1.742 +} 1.743 + 1.744 +static char* shmat_at_address(int shmid, char* req_addr) { 1.745 + if (!is_ptr_aligned(req_addr, SHMLBA)) { 1.746 + assert(false, "Requested address needs to be SHMLBA aligned"); 1.747 + return NULL; 1.748 + } 1.749 + 1.750 + char* addr = (char*)shmat(shmid, req_addr, 0); 1.751 + 1.752 + if ((intptr_t)addr == -1) { 1.753 + shm_warning_with_errno("Failed to attach shared memory."); 1.754 + return NULL; 1.755 + } 1.756 + 1.757 + return addr; 1.758 +} 1.759 + 1.760 +static char* shmat_large_pages(int shmid, size_t bytes, size_t alignment, char* req_addr) { 1.761 + // If a req_addr has been provided, we assume that the caller has already aligned the address. 1.762 + if (req_addr != NULL) { 1.763 + assert(is_ptr_aligned(req_addr, os::large_page_size()), "Must be divisible by the large page size"); 1.764 + assert(is_ptr_aligned(req_addr, alignment), "Must be divisible by given alignment"); 1.765 + return shmat_at_address(shmid, req_addr); 1.766 + } 1.767 + 1.768 + // Since shmid has been setup with SHM_HUGETLB, shmat will automatically 1.769 + // return large page size aligned memory addresses when req_addr == NULL. 1.770 + // However, if the alignment is larger than the large page size, we have 1.771 + // to manually ensure that the memory returned is 'alignment' aligned. 1.772 + if (alignment > os::large_page_size()) { 1.773 + assert(is_size_aligned(alignment, os::large_page_size()), "Must be divisible by the large page size"); 1.774 + return shmat_with_alignment(shmid, bytes, alignment); 1.775 + } else { 1.776 + return shmat_at_address(shmid, NULL); 1.777 + } 1.778 +} 1.779 + 1.780 +char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec) { 1.781 + // "exec" is passed in but not used. Creating the shared image for 1.782 + // the code cache doesn't have an SHM_X executable permission to check. 1.783 + assert(UseLargePages && UseSHM, "only for SHM large pages"); 1.784 + assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address"); 1.785 + assert(is_ptr_aligned(req_addr, alignment), "Unaligned address"); 1.786 + 1.787 + if (!is_size_aligned(bytes, os::large_page_size())) { 1.788 + return NULL; // Fallback to small pages. 1.789 + } 1.790 + 1.791 + // Create a large shared memory region to attach to based on size. 1.792 + // Currently, size is the total size of the heap. 1.793 + int shmid = shmget(IPC_PRIVATE, bytes, SHM_HUGETLB|IPC_CREAT|SHM_R|SHM_W); 1.794 + if (shmid == -1) { 1.795 + // Possible reasons for shmget failure: 1.796 + // 1. shmmax is too small for Java heap. 1.797 + // > check shmmax value: cat /proc/sys/kernel/shmmax 1.798 + // > increase shmmax value: echo "0xffffffff" > /proc/sys/kernel/shmmax 1.799 + // 2. not enough large page memory. 1.800 + // > check available large pages: cat /proc/meminfo 1.801 + // > increase amount of large pages: 1.802 + // echo new_value > /proc/sys/vm/nr_hugepages 1.803 + // Note 1: different Linux may use different name for this property, 1.804 + // e.g. on Redhat AS-3 it is "hugetlb_pool". 1.805 + // Note 2: it's possible there's enough physical memory available but 1.806 + // they are so fragmented after a long run that they can't 1.807 + // coalesce into large pages. Try to reserve large pages when 1.808 + // the system is still "fresh". 1.809 + shm_warning_with_errno("Failed to reserve shared memory."); 1.810 + return NULL; 1.811 + } 1.812 + 1.813 + // Attach to the region. 1.814 + char* addr = shmat_large_pages(shmid, bytes, alignment, req_addr); 1.815 + 1.816 + // Remove shmid. If shmat() is successful, the actual shared memory segment 1.817 + // will be deleted when it's detached by shmdt() or when the process 1.818 + // terminates. If shmat() is not successful this will remove the shared 1.819 + // segment immediately. 1.820 + shmctl(shmid, IPC_RMID, NULL); 1.821 + 1.822 + return addr; 1.823 +} 1.824 + 1.825 +static void warn_on_large_pages_failure(char* req_addr, size_t bytes, int error) { 1.826 + assert(error == ENOMEM, "Only expect to fail if no memory is available"); 1.827 + 1.828 + bool warn_on_failure = UseLargePages && 1.829 + (!FLAG_IS_DEFAULT(UseLargePages) || 1.830 + !FLAG_IS_DEFAULT(UseHugeTLBFS) || 1.831 + !FLAG_IS_DEFAULT(LargePageSizeInBytes)); 1.832 + 1.833 + if (warn_on_failure) { 1.834 + char msg[128]; 1.835 + jio_snprintf(msg, sizeof(msg), "Failed to reserve large pages memory req_addr: " 1.836 + PTR_FORMAT " bytes: " SIZE_FORMAT " (errno = %d).", req_addr, bytes, error); 1.837 + warning("%s", msg); 1.838 + } 1.839 +} 1.840 + 1.841 +char* os::Linux::reserve_memory_special_huge_tlbfs_only(size_t bytes, char* req_addr, bool exec) { 1.842 + assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages"); 1.843 + assert(is_size_aligned(bytes, os::large_page_size()), "Unaligned size"); 1.844 + assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address"); 1.845 + 1.846 + int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE; 1.847 + char* addr = (char*)::mmap(req_addr, bytes, prot, 1.848 + MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, 1.849 + -1, 0); 1.850 + 1.851 + if (addr == MAP_FAILED) { 1.852 + warn_on_large_pages_failure(req_addr, bytes, errno); 1.853 + return NULL; 1.854 + } 1.855 + 1.856 + assert(is_ptr_aligned(addr, os::large_page_size()), "Must be"); 1.857 + 1.858 + return addr; 1.859 } 1.860 1.861 // Reserve memory using mmap(MAP_HUGETLB).