Wed, 27 Aug 2014 08:19:12 -0400
8046598: Scalable Native memory tracking development
Summary: Enhance scalability of native memory tracking
Reviewed-by: coleenp, ctornqvi, gtriantafill
zgu@7074 | 1 | /* |
zgu@7074 | 2 | * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. |
zgu@7074 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
zgu@7074 | 4 | * |
zgu@7074 | 5 | * This code is free software; you can redistribute it and/or modify it |
zgu@7074 | 6 | * under the terms of the GNU General Public License version 2 only, as |
zgu@7074 | 7 | * published by the Free Software Foundation. |
zgu@7074 | 8 | * |
zgu@7074 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
zgu@7074 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
zgu@7074 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
zgu@7074 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
zgu@7074 | 13 | * accompanied this code). |
zgu@7074 | 14 | * |
zgu@7074 | 15 | * You should have received a copy of the GNU General Public License version |
zgu@7074 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
zgu@7074 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
zgu@7074 | 18 | * |
zgu@7074 | 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
zgu@7074 | 20 | * or visit www.oracle.com if you need additional information or have any |
zgu@7074 | 21 | * questions. |
zgu@7074 | 22 | * |
zgu@7074 | 23 | */ |
zgu@7074 | 24 | #include "precompiled.hpp" |
zgu@7074 | 25 | |
zgu@7074 | 26 | |
zgu@7074 | 27 | #include "memory/allocation.inline.hpp" |
zgu@7074 | 28 | #include "runtime/atomic.hpp" |
zgu@7074 | 29 | #include "services/mallocSiteTable.hpp" |
zgu@7074 | 30 | |
zgu@7074 | 31 | /* |
zgu@7074 | 32 | * Early os::malloc() calls come from initializations of static variables, long before entering any |
zgu@7074 | 33 | * VM code. Upon the arrival of the first os::malloc() call, malloc site hashtable has to be |
zgu@7074 | 34 | * initialized, along with the allocation site for the hashtable entries. |
zgu@7074 | 35 | * To ensure that malloc site hashtable can be initialized without triggering any additional os::malloc() |
zgu@7074 | 36 | * call, the hashtable bucket array and hashtable entry allocation site have to be static. |
zgu@7074 | 37 | * It is not a problem for hashtable bucket, since it is an array of pointer type, C runtime just |
zgu@7074 | 38 | * allocates a block memory and zero the memory for it. |
zgu@7074 | 39 | * But for hashtable entry allocation site object, things get tricky. C runtime not only allocates |
zgu@7074 | 40 | * memory for it, but also calls its constructor at some later time. If we initialize the allocation site |
zgu@7074 | 41 | * at the first os::malloc() call, the object will be reinitialized when its constructor is called |
zgu@7074 | 42 | * by C runtime. |
zgu@7074 | 43 | * To workaround above issue, we declare a static size_t array with the size of the CallsiteHashtableEntry, |
zgu@7074 | 44 | * the memory is used to instantiate CallsiteHashtableEntry for the hashtable entry allocation site. |
zgu@7074 | 45 | * Given it is a primitive type array, C runtime will do nothing other than assign the memory block for the variable, |
zgu@7074 | 46 | * which is exactly what we want. |
zgu@7074 | 47 | * The same trick is also applied to create NativeCallStack object for CallsiteHashtableEntry memory allocation. |
zgu@7074 | 48 | * |
zgu@7074 | 49 | * Note: C++ object usually aligns to particular alignment, depends on compiler implementation, we declare |
zgu@7074 | 50 | * the memory as size_t arrays, to ensure the memory is aligned to native machine word alignment. |
zgu@7074 | 51 | */ |
zgu@7074 | 52 | |
zgu@7074 | 53 | // Reserve enough memory for NativeCallStack and MallocSiteHashtableEntry objects |
zgu@7074 | 54 | size_t MallocSiteTable::_hash_entry_allocation_stack[CALC_OBJ_SIZE_IN_TYPE(NativeCallStack, size_t)]; |
zgu@7074 | 55 | size_t MallocSiteTable::_hash_entry_allocation_site[CALC_OBJ_SIZE_IN_TYPE(MallocSiteHashtableEntry, size_t)]; |
zgu@7074 | 56 | |
zgu@7074 | 57 | // Malloc site hashtable buckets |
zgu@7074 | 58 | MallocSiteHashtableEntry* MallocSiteTable::_table[MallocSiteTable::table_size]; |
zgu@7074 | 59 | |
zgu@7074 | 60 | // concurrent access counter |
zgu@7074 | 61 | volatile int MallocSiteTable::_access_count = 0; |
zgu@7074 | 62 | |
zgu@7074 | 63 | // Tracking hashtable contention |
zgu@7074 | 64 | NOT_PRODUCT(int MallocSiteTable::_peak_count = 0;) |
zgu@7074 | 65 | |
zgu@7074 | 66 | |
zgu@7074 | 67 | /* |
zgu@7074 | 68 | * Initialize malloc site table. |
zgu@7074 | 69 | * Hashtable entry is malloc'd, so it can cause infinite recursion. |
zgu@7074 | 70 | * To avoid above problem, we pre-initialize a hash entry for |
zgu@7074 | 71 | * this allocation site. |
zgu@7074 | 72 | * The method is called during C runtime static variable initialization |
zgu@7074 | 73 | * time, it is in single-threaded mode from JVM perspective. |
zgu@7074 | 74 | */ |
zgu@7074 | 75 | bool MallocSiteTable::initialize() { |
zgu@7074 | 76 | assert(sizeof(_hash_entry_allocation_stack) >= sizeof(NativeCallStack), "Sanity Check"); |
zgu@7074 | 77 | assert(sizeof(_hash_entry_allocation_site) >= sizeof(MallocSiteHashtableEntry), |
zgu@7074 | 78 | "Sanity Check"); |
zgu@7074 | 79 | assert((size_t)table_size <= MAX_MALLOCSITE_TABLE_SIZE, "Hashtable overflow"); |
zgu@7074 | 80 | |
zgu@7074 | 81 | // Fake the call stack for hashtable entry allocation |
zgu@7074 | 82 | assert(NMT_TrackingStackDepth > 1, "At least one tracking stack"); |
zgu@7074 | 83 | |
zgu@7074 | 84 | // Create pseudo call stack for hashtable entry allocation |
zgu@7074 | 85 | address pc[3]; |
zgu@7074 | 86 | if (NMT_TrackingStackDepth >= 3) { |
zgu@7074 | 87 | pc[2] = (address)MallocSiteTable::allocation_at; |
zgu@7074 | 88 | } |
zgu@7074 | 89 | if (NMT_TrackingStackDepth >= 2) { |
zgu@7074 | 90 | pc[1] = (address)MallocSiteTable::lookup_or_add; |
zgu@7074 | 91 | } |
zgu@7074 | 92 | pc[0] = (address)MallocSiteTable::new_entry; |
zgu@7074 | 93 | |
zgu@7074 | 94 | // Instantiate NativeCallStack object, have to use placement new operator. (see comments above) |
zgu@7074 | 95 | NativeCallStack* stack = ::new ((void*)_hash_entry_allocation_stack) |
zgu@7074 | 96 | NativeCallStack(pc, MIN2(((int)(sizeof(pc) / sizeof(address))), ((int)NMT_TrackingStackDepth))); |
zgu@7074 | 97 | |
zgu@7074 | 98 | // Instantiate hash entry for hashtable entry allocation callsite |
zgu@7074 | 99 | MallocSiteHashtableEntry* entry = ::new ((void*)_hash_entry_allocation_site) |
zgu@7074 | 100 | MallocSiteHashtableEntry(*stack); |
zgu@7074 | 101 | |
zgu@7074 | 102 | // Add the allocation site to hashtable. |
zgu@7074 | 103 | int index = hash_to_index(stack->hash()); |
zgu@7074 | 104 | _table[index] = entry; |
zgu@7074 | 105 | |
zgu@7074 | 106 | return true; |
zgu@7074 | 107 | } |
zgu@7074 | 108 | |
zgu@7074 | 109 | // Walks entries in the hashtable. |
zgu@7074 | 110 | // It stops walk if the walker returns false. |
zgu@7074 | 111 | bool MallocSiteTable::walk(MallocSiteWalker* walker) { |
zgu@7074 | 112 | MallocSiteHashtableEntry* head; |
zgu@7074 | 113 | for (int index = 0; index < table_size; index ++) { |
zgu@7074 | 114 | head = _table[index]; |
zgu@7074 | 115 | while (head != NULL) { |
zgu@7074 | 116 | if (!walker->do_malloc_site(head->peek())) { |
zgu@7074 | 117 | return false; |
zgu@7074 | 118 | } |
zgu@7074 | 119 | head = (MallocSiteHashtableEntry*)head->next(); |
zgu@7074 | 120 | } |
zgu@7074 | 121 | } |
zgu@7074 | 122 | return true; |
zgu@7074 | 123 | } |
zgu@7074 | 124 | |
zgu@7074 | 125 | /* |
zgu@7074 | 126 | * The hashtable does not have deletion policy on individual entry, |
zgu@7074 | 127 | * and each linked list node is inserted via compare-and-swap, |
zgu@7074 | 128 | * so each linked list is stable, the contention only happens |
zgu@7074 | 129 | * at the end of linked list. |
zgu@7074 | 130 | * This method should not return NULL under normal circumstance. |
zgu@7074 | 131 | * If NULL is returned, it indicates: |
zgu@7074 | 132 | * 1. Out of memory, it cannot allocate new hash entry. |
zgu@7074 | 133 | * 2. Overflow hash bucket. |
zgu@7074 | 134 | * Under any of above circumstances, caller should handle the situation. |
zgu@7074 | 135 | */ |
zgu@7074 | 136 | MallocSite* MallocSiteTable::lookup_or_add(const NativeCallStack& key, size_t* bucket_idx, |
zgu@7074 | 137 | size_t* pos_idx) { |
zgu@7074 | 138 | int index = hash_to_index(key.hash()); |
zgu@7074 | 139 | assert(index >= 0, "Negative index"); |
zgu@7074 | 140 | *bucket_idx = (size_t)index; |
zgu@7074 | 141 | *pos_idx = 0; |
zgu@7074 | 142 | |
zgu@7074 | 143 | // First entry for this hash bucket |
zgu@7074 | 144 | if (_table[index] == NULL) { |
zgu@7074 | 145 | MallocSiteHashtableEntry* entry = new_entry(key); |
zgu@7074 | 146 | // OOM check |
zgu@7074 | 147 | if (entry == NULL) return NULL; |
zgu@7074 | 148 | |
zgu@7074 | 149 | // swap in the head |
zgu@7074 | 150 | if (Atomic::cmpxchg_ptr((void*)entry, (volatile void *)&_table[index], NULL) == NULL) { |
zgu@7074 | 151 | return entry->data(); |
zgu@7074 | 152 | } |
zgu@7074 | 153 | |
zgu@7074 | 154 | delete entry; |
zgu@7074 | 155 | } |
zgu@7074 | 156 | |
zgu@7074 | 157 | MallocSiteHashtableEntry* head = _table[index]; |
zgu@7074 | 158 | while (head != NULL && (*pos_idx) <= MAX_BUCKET_LENGTH) { |
zgu@7074 | 159 | MallocSite* site = head->data(); |
zgu@7074 | 160 | if (site->equals(key)) { |
zgu@7074 | 161 | // found matched entry |
zgu@7074 | 162 | return head->data(); |
zgu@7074 | 163 | } |
zgu@7074 | 164 | |
zgu@7074 | 165 | if (head->next() == NULL && (*pos_idx) < MAX_BUCKET_LENGTH) { |
zgu@7074 | 166 | MallocSiteHashtableEntry* entry = new_entry(key); |
zgu@7074 | 167 | // OOM check |
zgu@7074 | 168 | if (entry == NULL) return NULL; |
zgu@7074 | 169 | if (head->atomic_insert(entry)) { |
zgu@7074 | 170 | (*pos_idx) ++; |
zgu@7074 | 171 | return entry->data(); |
zgu@7074 | 172 | } |
zgu@7074 | 173 | // contended, other thread won |
zgu@7074 | 174 | delete entry; |
zgu@7074 | 175 | } |
zgu@7074 | 176 | head = (MallocSiteHashtableEntry*)head->next(); |
zgu@7074 | 177 | (*pos_idx) ++; |
zgu@7074 | 178 | } |
zgu@7074 | 179 | return NULL; |
zgu@7074 | 180 | } |
zgu@7074 | 181 | |
zgu@7074 | 182 | // Access malloc site |
zgu@7074 | 183 | MallocSite* MallocSiteTable::malloc_site(size_t bucket_idx, size_t pos_idx) { |
zgu@7074 | 184 | assert(bucket_idx < table_size, "Invalid bucket index"); |
zgu@7074 | 185 | MallocSiteHashtableEntry* head = _table[bucket_idx]; |
zgu@7074 | 186 | for (size_t index = 0; index < pos_idx && head != NULL; |
zgu@7074 | 187 | index ++, head = (MallocSiteHashtableEntry*)head->next()); |
zgu@7074 | 188 | assert(head != NULL, "Invalid position index"); |
zgu@7074 | 189 | return head->data(); |
zgu@7074 | 190 | } |
zgu@7074 | 191 | |
zgu@7074 | 192 | // Allocates MallocSiteHashtableEntry object. Special call stack |
zgu@7074 | 193 | // (pre-installed allocation site) has to be used to avoid infinite |
zgu@7074 | 194 | // recursion. |
zgu@7074 | 195 | MallocSiteHashtableEntry* MallocSiteTable::new_entry(const NativeCallStack& key) { |
zgu@7074 | 196 | void* p = AllocateHeap(sizeof(MallocSiteHashtableEntry), mtNMT, |
zgu@7074 | 197 | *hash_entry_allocation_stack(), AllocFailStrategy::RETURN_NULL); |
zgu@7074 | 198 | return ::new (p) MallocSiteHashtableEntry(key); |
zgu@7074 | 199 | } |
zgu@7074 | 200 | |
zgu@7074 | 201 | void MallocSiteTable::reset() { |
zgu@7074 | 202 | for (int index = 0; index < table_size; index ++) { |
zgu@7074 | 203 | MallocSiteHashtableEntry* head = _table[index]; |
zgu@7074 | 204 | _table[index] = NULL; |
zgu@7074 | 205 | delete_linked_list(head); |
zgu@7074 | 206 | } |
zgu@7074 | 207 | } |
zgu@7074 | 208 | |
zgu@7074 | 209 | void MallocSiteTable::delete_linked_list(MallocSiteHashtableEntry* head) { |
zgu@7074 | 210 | MallocSiteHashtableEntry* p; |
zgu@7074 | 211 | while (head != NULL) { |
zgu@7074 | 212 | p = head; |
zgu@7074 | 213 | head = (MallocSiteHashtableEntry*)head->next(); |
zgu@7074 | 214 | if (p != (MallocSiteHashtableEntry*)_hash_entry_allocation_site) { |
zgu@7074 | 215 | delete p; |
zgu@7074 | 216 | } |
zgu@7074 | 217 | } |
zgu@7074 | 218 | } |
zgu@7074 | 219 | |
zgu@7074 | 220 | void MallocSiteTable::shutdown() { |
zgu@7074 | 221 | AccessLock locker(&_access_count); |
zgu@7074 | 222 | locker.exclusiveLock(); |
zgu@7074 | 223 | reset(); |
zgu@7074 | 224 | } |
zgu@7074 | 225 | |
zgu@7074 | 226 | bool MallocSiteTable::walk_malloc_site(MallocSiteWalker* walker) { |
zgu@7074 | 227 | assert(walker != NULL, "NuLL walker"); |
zgu@7074 | 228 | AccessLock locker(&_access_count); |
zgu@7074 | 229 | if (locker.sharedLock()) { |
zgu@7074 | 230 | NOT_PRODUCT(_peak_count = MAX2(_peak_count, _access_count);) |
zgu@7074 | 231 | return walk(walker); |
zgu@7074 | 232 | } |
zgu@7074 | 233 | return false; |
zgu@7074 | 234 | } |
zgu@7074 | 235 | |
zgu@7074 | 236 | |
zgu@7074 | 237 | void MallocSiteTable::AccessLock::exclusiveLock() { |
zgu@7074 | 238 | jint target; |
zgu@7074 | 239 | jint val; |
zgu@7074 | 240 | |
zgu@7074 | 241 | assert(_lock_state != ExclusiveLock, "Can only call once"); |
zgu@7074 | 242 | assert(*_lock >= 0, "Can not content exclusive lock"); |
zgu@7074 | 243 | |
zgu@7074 | 244 | // make counter negative to block out shared locks |
zgu@7074 | 245 | do { |
zgu@7074 | 246 | val = *_lock; |
zgu@7074 | 247 | target = _MAGIC_ + *_lock; |
zgu@7074 | 248 | } while (Atomic::cmpxchg(target, _lock, val) != val); |
zgu@7074 | 249 | |
zgu@7074 | 250 | // wait for all readers to exit |
zgu@7074 | 251 | while (*_lock != _MAGIC_) { |
zgu@7074 | 252 | #ifdef _WINDOWS |
zgu@7074 | 253 | os::naked_short_sleep(1); |
zgu@7074 | 254 | #else |
zgu@7074 | 255 | os::NakedYield(); |
zgu@7074 | 256 | #endif |
zgu@7074 | 257 | } |
zgu@7074 | 258 | _lock_state = ExclusiveLock; |
zgu@7074 | 259 | } |
zgu@7074 | 260 | |
zgu@7074 | 261 |