src/os/linux/vm/os_linux.cpp

changeset 8434
776cb7cbe2e4
parent 8433
44c8fe602a5e
child 8604
04d83ba48607
child 8619
3a38e441474d
     1.1 --- a/src/os/linux/vm/os_linux.cpp	Thu Apr 23 18:00:50 2015 +0200
     1.2 +++ b/src/os/linux/vm/os_linux.cpp	Mon Apr 25 11:36:14 2016 +0200
     1.3 @@ -3047,393 +3047,6 @@
     1.4    return addr == MAP_FAILED ? NULL : addr;
     1.5  }
     1.6  
     1.7 -// Don't update _highest_vm_reserved_address, because there might be memory
     1.8 -// regions above addr + size. If so, releasing a memory region only creates
     1.9 -// a hole in the address space, it doesn't help prevent heap-stack collision.
    1.10 -//
    1.11 -static int anon_munmap(char * addr, size_t size) {
    1.12 -  return ::munmap(addr, size) == 0;
    1.13 -}
    1.14 -
    1.15 -char* os::pd_reserve_memory(size_t bytes, char* requested_addr,
    1.16 -                         size_t alignment_hint) {
    1.17 -  return anon_mmap(requested_addr, bytes, (requested_addr != NULL));
    1.18 -}
    1.19 -
    1.20 -bool os::pd_release_memory(char* addr, size_t size) {
    1.21 -  return anon_munmap(addr, size);
    1.22 -}
    1.23 -
    1.24 -static address highest_vm_reserved_address() {
    1.25 -  return _highest_vm_reserved_address;
    1.26 -}
    1.27 -
    1.28 -static bool linux_mprotect(char* addr, size_t size, int prot) {
    1.29 -  // Linux wants the mprotect address argument to be page aligned.
    1.30 -  char* bottom = (char*)align_size_down((intptr_t)addr, os::Linux::page_size());
    1.31 -
    1.32 -  // According to SUSv3, mprotect() should only be used with mappings
    1.33 -  // established by mmap(), and mmap() always maps whole pages. Unaligned
    1.34 -  // 'addr' likely indicates problem in the VM (e.g. trying to change
    1.35 -  // protection of malloc'ed or statically allocated memory). Check the
    1.36 -  // caller if you hit this assert.
    1.37 -  assert(addr == bottom, "sanity check");
    1.38 -
    1.39 -  size = align_size_up(pointer_delta(addr, bottom, 1) + size, os::Linux::page_size());
    1.40 -  return ::mprotect(bottom, size, prot) == 0;
    1.41 -}
    1.42 -
    1.43 -// Set protections specified
    1.44 -bool os::protect_memory(char* addr, size_t bytes, ProtType prot,
    1.45 -                        bool is_committed) {
    1.46 -  unsigned int p = 0;
    1.47 -  switch (prot) {
    1.48 -  case MEM_PROT_NONE: p = PROT_NONE; break;
    1.49 -  case MEM_PROT_READ: p = PROT_READ; break;
    1.50 -  case MEM_PROT_RW:   p = PROT_READ|PROT_WRITE; break;
    1.51 -  case MEM_PROT_RWX:  p = PROT_READ|PROT_WRITE|PROT_EXEC; break;
    1.52 -  default:
    1.53 -    ShouldNotReachHere();
    1.54 -  }
    1.55 -  // is_committed is unused.
    1.56 -  return linux_mprotect(addr, bytes, p);
    1.57 -}
    1.58 -
    1.59 -bool os::guard_memory(char* addr, size_t size) {
    1.60 -  return linux_mprotect(addr, size, PROT_NONE);
    1.61 -}
    1.62 -
    1.63 -bool os::unguard_memory(char* addr, size_t size) {
    1.64 -  return linux_mprotect(addr, size, PROT_READ|PROT_WRITE);
    1.65 -}
    1.66 -
    1.67 -bool os::Linux::transparent_huge_pages_sanity_check(bool warn, size_t page_size) {
    1.68 -  bool result = false;
    1.69 -  void *p = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE,
    1.70 -                 MAP_ANONYMOUS|MAP_PRIVATE,
    1.71 -                 -1, 0);
    1.72 -  if (p != MAP_FAILED) {
    1.73 -    void *aligned_p = align_ptr_up(p, page_size);
    1.74 -
    1.75 -    result = madvise(aligned_p, page_size, MADV_HUGEPAGE) == 0;
    1.76 -
    1.77 -    munmap(p, page_size * 2);
    1.78 -  }
    1.79 -
    1.80 -  if (warn && !result) {
    1.81 -    warning("TransparentHugePages is not supported by the operating system.");
    1.82 -  }
    1.83 -
    1.84 -  return result;
    1.85 -}
    1.86 -
    1.87 -bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
    1.88 -  bool result = false;
    1.89 -  void *p = mmap(NULL, page_size, PROT_READ|PROT_WRITE,
    1.90 -                 MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
    1.91 -                 -1, 0);
    1.92 -
    1.93 -  if (p != MAP_FAILED) {
    1.94 -    // We don't know if this really is a huge page or not.
    1.95 -    FILE *fp = fopen("/proc/self/maps", "r");
    1.96 -    if (fp) {
    1.97 -      while (!feof(fp)) {
    1.98 -        char chars[257];
    1.99 -        long x = 0;
   1.100 -        if (fgets(chars, sizeof(chars), fp)) {
   1.101 -          if (sscanf(chars, "%lx-%*x", &x) == 1
   1.102 -              && x == (long)p) {
   1.103 -            if (strstr (chars, "hugepage")) {
   1.104 -              result = true;
   1.105 -              break;
   1.106 -            }
   1.107 -          }
   1.108 -        }
   1.109 -      }
   1.110 -      fclose(fp);
   1.111 -    }
   1.112 -    munmap(p, page_size);
   1.113 -  }
   1.114 -
   1.115 -  if (warn && !result) {
   1.116 -    warning("HugeTLBFS is not supported by the operating system.");
   1.117 -  }
   1.118 -
   1.119 -  return result;
   1.120 -}
   1.121 -
   1.122 -/*
   1.123 -* Set the coredump_filter bits to include largepages in core dump (bit 6)
   1.124 -*
   1.125 -* From the coredump_filter documentation:
   1.126 -*
   1.127 -* - (bit 0) anonymous private memory
   1.128 -* - (bit 1) anonymous shared memory
   1.129 -* - (bit 2) file-backed private memory
   1.130 -* - (bit 3) file-backed shared memory
   1.131 -* - (bit 4) ELF header pages in file-backed private memory areas (it is
   1.132 -*           effective only if the bit 2 is cleared)
   1.133 -* - (bit 5) hugetlb private memory
   1.134 -* - (bit 6) hugetlb shared memory
   1.135 -*/
   1.136 -static void set_coredump_filter(void) {
   1.137 -  FILE *f;
   1.138 -  long cdm;
   1.139 -
   1.140 -  if ((f = fopen("/proc/self/coredump_filter", "r+")) == NULL) {
   1.141 -    return;
   1.142 -  }
   1.143 -
   1.144 -  if (fscanf(f, "%lx", &cdm) != 1) {
   1.145 -    fclose(f);
   1.146 -    return;
   1.147 -  }
   1.148 -
   1.149 -  rewind(f);
   1.150 -
   1.151 -  if ((cdm & LARGEPAGES_BIT) == 0) {
   1.152 -    cdm |= LARGEPAGES_BIT;
   1.153 -    fprintf(f, "%#lx", cdm);
   1.154 -  }
   1.155 -
   1.156 -  fclose(f);
   1.157 -}
   1.158 -
   1.159 -// Large page support
   1.160 -
   1.161 -static size_t _large_page_size = 0;
   1.162 -
   1.163 -size_t os::Linux::find_large_page_size() {
   1.164 -  size_t large_page_size = 0;
   1.165 -
   1.166 -  // large_page_size on Linux is used to round up heap size. x86 uses either
   1.167 -  // 2M or 4M page, depending on whether PAE (Physical Address Extensions)
   1.168 -  // mode is enabled. AMD64/EM64T uses 2M page in 64bit mode. IA64 can use
   1.169 -  // page as large as 256M.
   1.170 -  //
   1.171 -  // Here we try to figure out page size by parsing /proc/meminfo and looking
   1.172 -  // for a line with the following format:
   1.173 -  //    Hugepagesize:     2048 kB
   1.174 -  //
   1.175 -  // If we can't determine the value (e.g. /proc is not mounted, or the text
   1.176 -  // format has been changed), we'll use the largest page size supported by
   1.177 -  // the processor.
   1.178 -
   1.179 -#ifndef ZERO
   1.180 -  large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M)
   1.181 -                     ARM_ONLY(2 * M) PPC_ONLY(4 * M);
   1.182 -#endif // ZERO
   1.183 -
   1.184 -  FILE *fp = fopen("/proc/meminfo", "r");
   1.185 -  if (fp) {
   1.186 -    while (!feof(fp)) {
   1.187 -      int x = 0;
   1.188 -      char buf[16];
   1.189 -      if (fscanf(fp, "Hugepagesize: %d", &x) == 1) {
   1.190 -        if (x && fgets(buf, sizeof(buf), fp) && strcmp(buf, " kB\n") == 0) {
   1.191 -          large_page_size = x * K;
   1.192 -          break;
   1.193 -        }
   1.194 -      } else {
   1.195 -        // skip to next line
   1.196 -        for (;;) {
   1.197 -          int ch = fgetc(fp);
   1.198 -          if (ch == EOF || ch == (int)'\n') break;
   1.199 -        }
   1.200 -      }
   1.201 -    }
   1.202 -    fclose(fp);
   1.203 -  }
   1.204 -
   1.205 -  if (!FLAG_IS_DEFAULT(LargePageSizeInBytes) && LargePageSizeInBytes != large_page_size) {
   1.206 -    warning("Setting LargePageSizeInBytes has no effect on this OS. Large page size is "
   1.207 -        SIZE_FORMAT "%s.", byte_size_in_proper_unit(large_page_size),
   1.208 -        proper_unit_for_byte_size(large_page_size));
   1.209 -  }
   1.210 -
   1.211 -  return large_page_size;
   1.212 -}
   1.213 -
   1.214 -size_t os::Linux::setup_large_page_size() {
   1.215 -  _large_page_size = Linux::find_large_page_size();
   1.216 -  const size_t default_page_size = (size_t)Linux::page_size();
   1.217 -  if (_large_page_size > default_page_size) {
   1.218 -    _page_sizes[0] = _large_page_size;
   1.219 -    _page_sizes[1] = default_page_size;
   1.220 -    _page_sizes[2] = 0;
   1.221 -  }
   1.222 -
   1.223 -  return _large_page_size;
   1.224 -}
   1.225 -
   1.226 -bool os::Linux::setup_large_page_type(size_t page_size) {
   1.227 -  if (FLAG_IS_DEFAULT(UseHugeTLBFS) &&
   1.228 -      FLAG_IS_DEFAULT(UseSHM) &&
   1.229 -      FLAG_IS_DEFAULT(UseTransparentHugePages)) {
   1.230 -
   1.231 -    // The type of large pages has not been specified by the user.
   1.232 -
   1.233 -    // Try UseHugeTLBFS and then UseSHM.
   1.234 -    UseHugeTLBFS = UseSHM = true;
   1.235 -
   1.236 -    // Don't try UseTransparentHugePages since there are known
   1.237 -    // performance issues with it turned on. This might change in the future.
   1.238 -    UseTransparentHugePages = false;
   1.239 -  }
   1.240 -
   1.241 -  if (UseTransparentHugePages) {
   1.242 -    bool warn_on_failure = !FLAG_IS_DEFAULT(UseTransparentHugePages);
   1.243 -    if (transparent_huge_pages_sanity_check(warn_on_failure, page_size)) {
   1.244 -      UseHugeTLBFS = false;
   1.245 -      UseSHM = false;
   1.246 -      return true;
   1.247 -    }
   1.248 -    UseTransparentHugePages = false;
   1.249 -  }
   1.250 -
   1.251 -  if (UseHugeTLBFS) {
   1.252 -    bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS);
   1.253 -    if (hugetlbfs_sanity_check(warn_on_failure, page_size)) {
   1.254 -      UseSHM = false;
   1.255 -      return true;
   1.256 -    }
   1.257 -    UseHugeTLBFS = false;
   1.258 -  }
   1.259 -
   1.260 -  return UseSHM;
   1.261 -}
   1.262 -
   1.263 -void os::large_page_init() {
   1.264 -  if (!UseLargePages &&
   1.265 -      !UseTransparentHugePages &&
   1.266 -      !UseHugeTLBFS &&
   1.267 -      !UseSHM) {
   1.268 -    // Not using large pages.
   1.269 -    return;
   1.270 -  }
   1.271 -
   1.272 -  if (!FLAG_IS_DEFAULT(UseLargePages) && !UseLargePages) {
   1.273 -    // The user explicitly turned off large pages.
   1.274 -    // Ignore the rest of the large pages flags.
   1.275 -    UseTransparentHugePages = false;
   1.276 -    UseHugeTLBFS = false;
   1.277 -    UseSHM = false;
   1.278 -    return;
   1.279 -  }
   1.280 -
   1.281 -  size_t large_page_size = Linux::setup_large_page_size();
   1.282 -  UseLargePages          = Linux::setup_large_page_type(large_page_size);
   1.283 -
   1.284 -  set_coredump_filter();
   1.285 -}
   1.286 -
   1.287 -#ifndef SHM_HUGETLB
   1.288 -#define SHM_HUGETLB 04000
   1.289 -#endif
   1.290 -
   1.291 -char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec) {
   1.292 -  // "exec" is passed in but not used.  Creating the shared image for
   1.293 -  // the code cache doesn't have an SHM_X executable permission to check.
   1.294 -  assert(UseLargePages && UseSHM, "only for SHM large pages");
   1.295 -  assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address");
   1.296 -
   1.297 -  if (!is_size_aligned(bytes, os::large_page_size()) || alignment > os::large_page_size()) {
   1.298 -    return NULL; // Fallback to small pages.
   1.299 -  }
   1.300 -
   1.301 -  key_t key = IPC_PRIVATE;
   1.302 -  char *addr;
   1.303 -
   1.304 -  bool warn_on_failure = UseLargePages &&
   1.305 -                        (!FLAG_IS_DEFAULT(UseLargePages) ||
   1.306 -                         !FLAG_IS_DEFAULT(UseSHM) ||
   1.307 -                         !FLAG_IS_DEFAULT(LargePageSizeInBytes)
   1.308 -                        );
   1.309 -  char msg[128];
   1.310 -
   1.311 -  // Create a large shared memory region to attach to based on size.
   1.312 -  // Currently, size is the total size of the heap
   1.313 -  int shmid = shmget(key, bytes, SHM_HUGETLB|IPC_CREAT|SHM_R|SHM_W);
   1.314 -  if (shmid == -1) {
   1.315 -     // Possible reasons for shmget failure:
   1.316 -     // 1. shmmax is too small for Java heap.
   1.317 -     //    > check shmmax value: cat /proc/sys/kernel/shmmax
   1.318 -     //    > increase shmmax value: echo "0xffffffff" > /proc/sys/kernel/shmmax
   1.319 -     // 2. not enough large page memory.
   1.320 -     //    > check available large pages: cat /proc/meminfo
   1.321 -     //    > increase amount of large pages:
   1.322 -     //          echo new_value > /proc/sys/vm/nr_hugepages
   1.323 -     //      Note 1: different Linux may use different name for this property,
   1.324 -     //            e.g. on Redhat AS-3 it is "hugetlb_pool".
   1.325 -     //      Note 2: it's possible there's enough physical memory available but
   1.326 -     //            they are so fragmented after a long run that they can't
   1.327 -     //            coalesce into large pages. Try to reserve large pages when
   1.328 -     //            the system is still "fresh".
   1.329 -     if (warn_on_failure) {
   1.330 -       jio_snprintf(msg, sizeof(msg), "Failed to reserve shared memory (errno = %d).", errno);
   1.331 -       warning("%s", msg);
   1.332 -     }
   1.333 -     return NULL;
   1.334 -  }
   1.335 -
   1.336 -  // attach to the region
   1.337 -  addr = (char*)shmat(shmid, req_addr, 0);
   1.338 -  int err = errno;
   1.339 -
   1.340 -  // Remove shmid. If shmat() is successful, the actual shared memory segment
   1.341 -  // will be deleted when it's detached by shmdt() or when the process
   1.342 -  // terminates. If shmat() is not successful this will remove the shared
   1.343 -  // segment immediately.
   1.344 -  shmctl(shmid, IPC_RMID, NULL);
   1.345 -
   1.346 -  if ((intptr_t)addr == -1) {
   1.347 -     if (warn_on_failure) {
   1.348 -       jio_snprintf(msg, sizeof(msg), "Failed to attach shared memory (errno = %d).", err);
   1.349 -       warning("%s", msg);
   1.350 -     }
   1.351 -     return NULL;
   1.352 -  }
   1.353 -
   1.354 -  return addr;
   1.355 -}
   1.356 -
   1.357 -static void warn_on_large_pages_failure(char* req_addr, size_t bytes, int error) {
   1.358 -  assert(error == ENOMEM, "Only expect to fail if no memory is available");
   1.359 -
   1.360 -  bool warn_on_failure = UseLargePages &&
   1.361 -      (!FLAG_IS_DEFAULT(UseLargePages) ||
   1.362 -       !FLAG_IS_DEFAULT(UseHugeTLBFS) ||
   1.363 -       !FLAG_IS_DEFAULT(LargePageSizeInBytes));
   1.364 -
   1.365 -  if (warn_on_failure) {
   1.366 -    char msg[128];
   1.367 -    jio_snprintf(msg, sizeof(msg), "Failed to reserve large pages memory req_addr: "
   1.368 -        PTR_FORMAT " bytes: " SIZE_FORMAT " (errno = %d).", req_addr, bytes, error);
   1.369 -    warning("%s", msg);
   1.370 -  }
   1.371 -}
   1.372 -
   1.373 -char* os::Linux::reserve_memory_special_huge_tlbfs_only(size_t bytes, char* req_addr, bool exec) {
   1.374 -  assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages");
   1.375 -  assert(is_size_aligned(bytes, os::large_page_size()), "Unaligned size");
   1.376 -  assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address");
   1.377 -
   1.378 -  int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
   1.379 -  char* addr = (char*)::mmap(req_addr, bytes, prot,
   1.380 -                             MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB,
   1.381 -                             -1, 0);
   1.382 -
   1.383 -  if (addr == MAP_FAILED) {
   1.384 -    warn_on_large_pages_failure(req_addr, bytes, errno);
   1.385 -    return NULL;
   1.386 -  }
   1.387 -
   1.388 -  assert(is_ptr_aligned(addr, os::large_page_size()), "Must be");
   1.389 -
   1.390 -  return addr;
   1.391 -}
   1.392 -
   1.393 -// Helper for os::Linux::reserve_memory_special_huge_tlbfs_mixed().
   1.394  // Allocate (using mmap, NO_RESERVE, with small pages) at either a given request address
   1.395  //   (req_addr != NULL) or with a given alignment.
   1.396  //  - bytes shall be a multiple of alignment.
   1.397 @@ -3474,7 +3087,463 @@
   1.398      }
   1.399    }
   1.400    return start;
   1.401 -
   1.402 +}
   1.403 +
   1.404 +// Don't update _highest_vm_reserved_address, because there might be memory
   1.405 +// regions above addr + size. If so, releasing a memory region only creates
   1.406 +// a hole in the address space, it doesn't help prevent heap-stack collision.
   1.407 +//
   1.408 +static int anon_munmap(char * addr, size_t size) {
   1.409 +  return ::munmap(addr, size) == 0;
   1.410 +}
   1.411 +
   1.412 +char* os::pd_reserve_memory(size_t bytes, char* requested_addr,
   1.413 +                         size_t alignment_hint) {
   1.414 +  return anon_mmap(requested_addr, bytes, (requested_addr != NULL));
   1.415 +}
   1.416 +
   1.417 +bool os::pd_release_memory(char* addr, size_t size) {
   1.418 +  return anon_munmap(addr, size);
   1.419 +}
   1.420 +
   1.421 +static address highest_vm_reserved_address() {
   1.422 +  return _highest_vm_reserved_address;
   1.423 +}
   1.424 +
   1.425 +static bool linux_mprotect(char* addr, size_t size, int prot) {
   1.426 +  // Linux wants the mprotect address argument to be page aligned.
   1.427 +  char* bottom = (char*)align_size_down((intptr_t)addr, os::Linux::page_size());
   1.428 +
   1.429 +  // According to SUSv3, mprotect() should only be used with mappings
   1.430 +  // established by mmap(), and mmap() always maps whole pages. Unaligned
   1.431 +  // 'addr' likely indicates problem in the VM (e.g. trying to change
   1.432 +  // protection of malloc'ed or statically allocated memory). Check the
   1.433 +  // caller if you hit this assert.
   1.434 +  assert(addr == bottom, "sanity check");
   1.435 +
   1.436 +  size = align_size_up(pointer_delta(addr, bottom, 1) + size, os::Linux::page_size());
   1.437 +  return ::mprotect(bottom, size, prot) == 0;
   1.438 +}
   1.439 +
   1.440 +// Set protections specified
   1.441 +bool os::protect_memory(char* addr, size_t bytes, ProtType prot,
   1.442 +                        bool is_committed) {
   1.443 +  unsigned int p = 0;
   1.444 +  switch (prot) {
   1.445 +  case MEM_PROT_NONE: p = PROT_NONE; break;
   1.446 +  case MEM_PROT_READ: p = PROT_READ; break;
   1.447 +  case MEM_PROT_RW:   p = PROT_READ|PROT_WRITE; break;
   1.448 +  case MEM_PROT_RWX:  p = PROT_READ|PROT_WRITE|PROT_EXEC; break;
   1.449 +  default:
   1.450 +    ShouldNotReachHere();
   1.451 +  }
   1.452 +  // is_committed is unused.
   1.453 +  return linux_mprotect(addr, bytes, p);
   1.454 +}
   1.455 +
   1.456 +bool os::guard_memory(char* addr, size_t size) {
   1.457 +  return linux_mprotect(addr, size, PROT_NONE);
   1.458 +}
   1.459 +
   1.460 +bool os::unguard_memory(char* addr, size_t size) {
   1.461 +  return linux_mprotect(addr, size, PROT_READ|PROT_WRITE);
   1.462 +}
   1.463 +
   1.464 +bool os::Linux::transparent_huge_pages_sanity_check(bool warn, size_t page_size) {
   1.465 +  bool result = false;
   1.466 +  void *p = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE,
   1.467 +                 MAP_ANONYMOUS|MAP_PRIVATE,
   1.468 +                 -1, 0);
   1.469 +  if (p != MAP_FAILED) {
   1.470 +    void *aligned_p = align_ptr_up(p, page_size);
   1.471 +
   1.472 +    result = madvise(aligned_p, page_size, MADV_HUGEPAGE) == 0;
   1.473 +
   1.474 +    munmap(p, page_size * 2);
   1.475 +  }
   1.476 +
   1.477 +  if (warn && !result) {
   1.478 +    warning("TransparentHugePages is not supported by the operating system.");
   1.479 +  }
   1.480 +
   1.481 +  return result;
   1.482 +}
   1.483 +
   1.484 +bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
   1.485 +  bool result = false;
   1.486 +  void *p = mmap(NULL, page_size, PROT_READ|PROT_WRITE,
   1.487 +                 MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
   1.488 +                 -1, 0);
   1.489 +
   1.490 +  if (p != MAP_FAILED) {
   1.491 +    // We don't know if this really is a huge page or not.
   1.492 +    FILE *fp = fopen("/proc/self/maps", "r");
   1.493 +    if (fp) {
   1.494 +      while (!feof(fp)) {
   1.495 +        char chars[257];
   1.496 +        long x = 0;
   1.497 +        if (fgets(chars, sizeof(chars), fp)) {
   1.498 +          if (sscanf(chars, "%lx-%*x", &x) == 1
   1.499 +              && x == (long)p) {
   1.500 +            if (strstr (chars, "hugepage")) {
   1.501 +              result = true;
   1.502 +              break;
   1.503 +            }
   1.504 +          }
   1.505 +        }
   1.506 +      }
   1.507 +      fclose(fp);
   1.508 +    }
   1.509 +    munmap(p, page_size);
   1.510 +  }
   1.511 +
   1.512 +  if (warn && !result) {
   1.513 +    warning("HugeTLBFS is not supported by the operating system.");
   1.514 +  }
   1.515 +
   1.516 +  return result;
   1.517 +}
   1.518 +
   1.519 +/*
   1.520 +* Set the coredump_filter bits to include largepages in core dump (bit 6)
   1.521 +*
   1.522 +* From the coredump_filter documentation:
   1.523 +*
   1.524 +* - (bit 0) anonymous private memory
   1.525 +* - (bit 1) anonymous shared memory
   1.526 +* - (bit 2) file-backed private memory
   1.527 +* - (bit 3) file-backed shared memory
   1.528 +* - (bit 4) ELF header pages in file-backed private memory areas (it is
   1.529 +*           effective only if the bit 2 is cleared)
   1.530 +* - (bit 5) hugetlb private memory
   1.531 +* - (bit 6) hugetlb shared memory
   1.532 +*/
   1.533 +static void set_coredump_filter(void) {
   1.534 +  FILE *f;
   1.535 +  long cdm;
   1.536 +
   1.537 +  if ((f = fopen("/proc/self/coredump_filter", "r+")) == NULL) {
   1.538 +    return;
   1.539 +  }
   1.540 +
   1.541 +  if (fscanf(f, "%lx", &cdm) != 1) {
   1.542 +    fclose(f);
   1.543 +    return;
   1.544 +  }
   1.545 +
   1.546 +  rewind(f);
   1.547 +
   1.548 +  if ((cdm & LARGEPAGES_BIT) == 0) {
   1.549 +    cdm |= LARGEPAGES_BIT;
   1.550 +    fprintf(f, "%#lx", cdm);
   1.551 +  }
   1.552 +
   1.553 +  fclose(f);
   1.554 +}
   1.555 +
   1.556 +// Large page support
   1.557 +
   1.558 +static size_t _large_page_size = 0;
   1.559 +
   1.560 +size_t os::Linux::find_large_page_size() {
   1.561 +  size_t large_page_size = 0;
   1.562 +
   1.563 +  // large_page_size on Linux is used to round up heap size. x86 uses either
   1.564 +  // 2M or 4M page, depending on whether PAE (Physical Address Extensions)
   1.565 +  // mode is enabled. AMD64/EM64T uses 2M page in 64bit mode. IA64 can use
   1.566 +  // page as large as 256M.
   1.567 +  //
   1.568 +  // Here we try to figure out page size by parsing /proc/meminfo and looking
   1.569 +  // for a line with the following format:
   1.570 +  //    Hugepagesize:     2048 kB
   1.571 +  //
   1.572 +  // If we can't determine the value (e.g. /proc is not mounted, or the text
   1.573 +  // format has been changed), we'll use the largest page size supported by
   1.574 +  // the processor.
   1.575 +
   1.576 +#ifndef ZERO
   1.577 +  large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M)
   1.578 +                     ARM_ONLY(2 * M) PPC_ONLY(4 * M);
   1.579 +#endif // ZERO
   1.580 +
   1.581 +  FILE *fp = fopen("/proc/meminfo", "r");
   1.582 +  if (fp) {
   1.583 +    while (!feof(fp)) {
   1.584 +      int x = 0;
   1.585 +      char buf[16];
   1.586 +      if (fscanf(fp, "Hugepagesize: %d", &x) == 1) {
   1.587 +        if (x && fgets(buf, sizeof(buf), fp) && strcmp(buf, " kB\n") == 0) {
   1.588 +          large_page_size = x * K;
   1.589 +          break;
   1.590 +        }
   1.591 +      } else {
   1.592 +        // skip to next line
   1.593 +        for (;;) {
   1.594 +          int ch = fgetc(fp);
   1.595 +          if (ch == EOF || ch == (int)'\n') break;
   1.596 +        }
   1.597 +      }
   1.598 +    }
   1.599 +    fclose(fp);
   1.600 +  }
   1.601 +
   1.602 +  if (!FLAG_IS_DEFAULT(LargePageSizeInBytes) && LargePageSizeInBytes != large_page_size) {
   1.603 +    warning("Setting LargePageSizeInBytes has no effect on this OS. Large page size is "
   1.604 +        SIZE_FORMAT "%s.", byte_size_in_proper_unit(large_page_size),
   1.605 +        proper_unit_for_byte_size(large_page_size));
   1.606 +  }
   1.607 +
   1.608 +  return large_page_size;
   1.609 +}
   1.610 +
   1.611 +size_t os::Linux::setup_large_page_size() {
   1.612 +  _large_page_size = Linux::find_large_page_size();
   1.613 +  const size_t default_page_size = (size_t)Linux::page_size();
   1.614 +  if (_large_page_size > default_page_size) {
   1.615 +    _page_sizes[0] = _large_page_size;
   1.616 +    _page_sizes[1] = default_page_size;
   1.617 +    _page_sizes[2] = 0;
   1.618 +  }
   1.619 +
   1.620 +  return _large_page_size;
   1.621 +}
   1.622 +
   1.623 +bool os::Linux::setup_large_page_type(size_t page_size) {
   1.624 +  if (FLAG_IS_DEFAULT(UseHugeTLBFS) &&
   1.625 +      FLAG_IS_DEFAULT(UseSHM) &&
   1.626 +      FLAG_IS_DEFAULT(UseTransparentHugePages)) {
   1.627 +
   1.628 +    // The type of large pages has not been specified by the user.
   1.629 +
   1.630 +    // Try UseHugeTLBFS and then UseSHM.
   1.631 +    UseHugeTLBFS = UseSHM = true;
   1.632 +
   1.633 +    // Don't try UseTransparentHugePages since there are known
   1.634 +    // performance issues with it turned on. This might change in the future.
   1.635 +    UseTransparentHugePages = false;
   1.636 +  }
   1.637 +
   1.638 +  if (UseTransparentHugePages) {
   1.639 +    bool warn_on_failure = !FLAG_IS_DEFAULT(UseTransparentHugePages);
   1.640 +    if (transparent_huge_pages_sanity_check(warn_on_failure, page_size)) {
   1.641 +      UseHugeTLBFS = false;
   1.642 +      UseSHM = false;
   1.643 +      return true;
   1.644 +    }
   1.645 +    UseTransparentHugePages = false;
   1.646 +  }
   1.647 +
   1.648 +  if (UseHugeTLBFS) {
   1.649 +    bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS);
   1.650 +    if (hugetlbfs_sanity_check(warn_on_failure, page_size)) {
   1.651 +      UseSHM = false;
   1.652 +      return true;
   1.653 +    }
   1.654 +    UseHugeTLBFS = false;
   1.655 +  }
   1.656 +
   1.657 +  return UseSHM;
   1.658 +}
   1.659 +
   1.660 +void os::large_page_init() {
   1.661 +  if (!UseLargePages &&
   1.662 +      !UseTransparentHugePages &&
   1.663 +      !UseHugeTLBFS &&
   1.664 +      !UseSHM) {
   1.665 +    // Not using large pages.
   1.666 +    return;
   1.667 +  }
   1.668 +
   1.669 +  if (!FLAG_IS_DEFAULT(UseLargePages) && !UseLargePages) {
   1.670 +    // The user explicitly turned off large pages.
   1.671 +    // Ignore the rest of the large pages flags.
   1.672 +    UseTransparentHugePages = false;
   1.673 +    UseHugeTLBFS = false;
   1.674 +    UseSHM = false;
   1.675 +    return;
   1.676 +  }
   1.677 +
   1.678 +  size_t large_page_size = Linux::setup_large_page_size();
   1.679 +  UseLargePages          = Linux::setup_large_page_type(large_page_size);
   1.680 +
   1.681 +  set_coredump_filter();
   1.682 +}
   1.683 +
   1.684 +#ifndef SHM_HUGETLB
   1.685 +#define SHM_HUGETLB 04000
   1.686 +#endif
   1.687 +
   1.688 +#define shm_warning_format(format, ...)              \
   1.689 +  do {                                               \
   1.690 +    if (UseLargePages &&                             \
   1.691 +        (!FLAG_IS_DEFAULT(UseLargePages) ||          \
   1.692 +         !FLAG_IS_DEFAULT(UseSHM) ||                 \
   1.693 +         !FLAG_IS_DEFAULT(LargePageSizeInBytes))) {  \
   1.694 +      warning(format, __VA_ARGS__);                  \
   1.695 +    }                                                \
   1.696 +  } while (0)
   1.697 +
   1.698 +#define shm_warning(str) shm_warning_format("%s", str)
   1.699 +
   1.700 +#define shm_warning_with_errno(str)                \
   1.701 +  do {                                             \
   1.702 +    int err = errno;                               \
   1.703 +    shm_warning_format(str " (error = %d)", err);  \
   1.704 +  } while (0)
   1.705 +
   1.706 +static char* shmat_with_alignment(int shmid, size_t bytes, size_t alignment) {
   1.707 +  assert(is_size_aligned(bytes, alignment), "Must be divisible by the alignment");
   1.708 +
   1.709 +  if (!is_size_aligned(alignment, SHMLBA)) {
   1.710 +    assert(false, "Code below assumes that alignment is at least SHMLBA aligned");
   1.711 +    return NULL;
   1.712 +  }
   1.713 +
   1.714 +  // To ensure that we get 'alignment' aligned memory from shmat,
   1.715 +  // we pre-reserve aligned virtual memory and then attach to that.
   1.716 +
   1.717 +  char* pre_reserved_addr = anon_mmap_aligned(bytes, alignment, NULL);
   1.718 +  if (pre_reserved_addr == NULL) {
   1.719 +    // Couldn't pre-reserve aligned memory.
   1.720 +    shm_warning("Failed to pre-reserve aligned memory for shmat.");
   1.721 +    return NULL;
   1.722 +  }
   1.723 +
   1.724 +  // SHM_REMAP is needed to allow shmat to map over an existing mapping.
   1.725 +  char* addr = (char*)shmat(shmid, pre_reserved_addr, SHM_REMAP);
   1.726 +
   1.727 +  if ((intptr_t)addr == -1) {
   1.728 +    int err = errno;
   1.729 +    shm_warning_with_errno("Failed to attach shared memory.");
   1.730 +
   1.731 +    assert(err != EACCES, "Unexpected error");
   1.732 +    assert(err != EIDRM,  "Unexpected error");
   1.733 +    assert(err != EINVAL, "Unexpected error");
   1.734 +
   1.735 +    // Since we don't know if the kernel unmapped the pre-reserved memory area
   1.736 +    // we can't unmap it, since that would potentially unmap memory that was
   1.737 +    // mapped from other threads.
   1.738 +    return NULL;
   1.739 +  }
   1.740 +
   1.741 +  return addr;
   1.742 +}
   1.743 +
   1.744 +static char* shmat_at_address(int shmid, char* req_addr) {
   1.745 +  if (!is_ptr_aligned(req_addr, SHMLBA)) {
   1.746 +    assert(false, "Requested address needs to be SHMLBA aligned");
   1.747 +    return NULL;
   1.748 +  }
   1.749 +
   1.750 +  char* addr = (char*)shmat(shmid, req_addr, 0);
   1.751 +
   1.752 +  if ((intptr_t)addr == -1) {
   1.753 +    shm_warning_with_errno("Failed to attach shared memory.");
   1.754 +    return NULL;
   1.755 +  }
   1.756 +
   1.757 +  return addr;
   1.758 +}
   1.759 +
   1.760 +static char* shmat_large_pages(int shmid, size_t bytes, size_t alignment, char* req_addr) {
   1.761 +  // If a req_addr has been provided, we assume that the caller has already aligned the address.
   1.762 +  if (req_addr != NULL) {
   1.763 +    assert(is_ptr_aligned(req_addr, os::large_page_size()), "Must be divisible by the large page size");
   1.764 +    assert(is_ptr_aligned(req_addr, alignment), "Must be divisible by given alignment");
   1.765 +    return shmat_at_address(shmid, req_addr);
   1.766 +  }
   1.767 +
   1.768 +  // Since shmid has been setup with SHM_HUGETLB, shmat will automatically
   1.769 +  // return large page size aligned memory addresses when req_addr == NULL.
   1.770 +  // However, if the alignment is larger than the large page size, we have
   1.771 +  // to manually ensure that the memory returned is 'alignment' aligned.
   1.772 +  if (alignment > os::large_page_size()) {
   1.773 +    assert(is_size_aligned(alignment, os::large_page_size()), "Must be divisible by the large page size");
   1.774 +    return shmat_with_alignment(shmid, bytes, alignment);
   1.775 +  } else {
   1.776 +    return shmat_at_address(shmid, NULL);
   1.777 +  }
   1.778 +}
   1.779 +
   1.780 +char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec) {
   1.781 +  // "exec" is passed in but not used.  Creating the shared image for
   1.782 +  // the code cache doesn't have an SHM_X executable permission to check.
   1.783 +  assert(UseLargePages && UseSHM, "only for SHM large pages");
   1.784 +  assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address");
   1.785 +  assert(is_ptr_aligned(req_addr, alignment), "Unaligned address");
   1.786 +
   1.787 +  if (!is_size_aligned(bytes, os::large_page_size())) {
   1.788 +    return NULL; // Fallback to small pages.
   1.789 +  }
   1.790 +
   1.791 +  // Create a large shared memory region to attach to based on size.
   1.792 +  // Currently, size is the total size of the heap.
   1.793 +  int shmid = shmget(IPC_PRIVATE, bytes, SHM_HUGETLB|IPC_CREAT|SHM_R|SHM_W);
   1.794 +  if (shmid == -1) {
   1.795 +    // Possible reasons for shmget failure:
   1.796 +    // 1. shmmax is too small for Java heap.
   1.797 +    //    > check shmmax value: cat /proc/sys/kernel/shmmax
   1.798 +    //    > increase shmmax value: echo "0xffffffff" > /proc/sys/kernel/shmmax
   1.799 +    // 2. not enough large page memory.
   1.800 +    //    > check available large pages: cat /proc/meminfo
   1.801 +    //    > increase amount of large pages:
   1.802 +    //          echo new_value > /proc/sys/vm/nr_hugepages
   1.803 +    //      Note 1: different Linux may use different name for this property,
   1.804 +    //            e.g. on Redhat AS-3 it is "hugetlb_pool".
   1.805 +    //      Note 2: it's possible there's enough physical memory available but
   1.806 +    //            they are so fragmented after a long run that they can't
   1.807 +    //            coalesce into large pages. Try to reserve large pages when
   1.808 +    //            the system is still "fresh".
   1.809 +    shm_warning_with_errno("Failed to reserve shared memory.");
   1.810 +    return NULL;
   1.811 +  }
   1.812 +
   1.813 +  // Attach to the region.
   1.814 +  char* addr = shmat_large_pages(shmid, bytes, alignment, req_addr);
   1.815 +
   1.816 +  // Remove shmid. If shmat() is successful, the actual shared memory segment
   1.817 +  // will be deleted when it's detached by shmdt() or when the process
   1.818 +  // terminates. If shmat() is not successful this will remove the shared
   1.819 +  // segment immediately.
   1.820 +  shmctl(shmid, IPC_RMID, NULL);
   1.821 +
   1.822 +  return addr;
   1.823 +}
   1.824 +
   1.825 +static void warn_on_large_pages_failure(char* req_addr, size_t bytes, int error) {
   1.826 +  assert(error == ENOMEM, "Only expect to fail if no memory is available");
   1.827 +
   1.828 +  bool warn_on_failure = UseLargePages &&
   1.829 +      (!FLAG_IS_DEFAULT(UseLargePages) ||
   1.830 +       !FLAG_IS_DEFAULT(UseHugeTLBFS) ||
   1.831 +       !FLAG_IS_DEFAULT(LargePageSizeInBytes));
   1.832 +
   1.833 +  if (warn_on_failure) {
   1.834 +    char msg[128];
   1.835 +    jio_snprintf(msg, sizeof(msg), "Failed to reserve large pages memory req_addr: "
   1.836 +        PTR_FORMAT " bytes: " SIZE_FORMAT " (errno = %d).", req_addr, bytes, error);
   1.837 +    warning("%s", msg);
   1.838 +  }
   1.839 +}
   1.840 +
   1.841 +char* os::Linux::reserve_memory_special_huge_tlbfs_only(size_t bytes, char* req_addr, bool exec) {
   1.842 +  assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages");
   1.843 +  assert(is_size_aligned(bytes, os::large_page_size()), "Unaligned size");
   1.844 +  assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address");
   1.845 +
   1.846 +  int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
   1.847 +  char* addr = (char*)::mmap(req_addr, bytes, prot,
   1.848 +                             MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB,
   1.849 +                             -1, 0);
   1.850 +
   1.851 +  if (addr == MAP_FAILED) {
   1.852 +    warn_on_large_pages_failure(req_addr, bytes, errno);
   1.853 +    return NULL;
   1.854 +  }
   1.855 +
   1.856 +  assert(is_ptr_aligned(addr, os::large_page_size()), "Must be");
   1.857 +
   1.858 +  return addr;
   1.859  }
   1.860  
   1.861  // Reserve memory using mmap(MAP_HUGETLB).

mercurial