Wed, 23 Sep 2020 15:18:53 +0300
8240124: Better VM Interning
Reviewed-by: mbalao, andrew
1.1 --- a/src/share/vm/classfile/altHashing.cpp Sat Sep 12 00:09:03 2020 +0300 1.2 +++ b/src/share/vm/classfile/altHashing.cpp Wed Sep 23 15:18:53 2020 +0300 1.3 @@ -1,5 +1,5 @@ 1.4 /* 1.5 - * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights reserved. 1.6 + * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved. 1.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.8 * 1.9 * This code is free software; you can redistribute it and/or modify it 1.10 @@ -22,12 +22,29 @@ 1.11 * 1.12 */ 1.13 1.14 +/* 1.15 + * halfsiphash code adapted from reference implementation 1.16 + * (https://github.com/veorq/SipHash/blob/master/halfsiphash.c) 1.17 + * which is distributed with the following copyright: 1.18 + * 1.19 + * SipHash reference C implementation 1.20 + * 1.21 + * Copyright (c) 2016 Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com> 1.22 + * 1.23 + * To the extent possible under law, the author(s) have dedicated all copyright 1.24 + * and related and neighboring rights to this software to the public domain 1.25 + * worldwide. This software is distributed without any warranty. 1.26 + * 1.27 + * You should have received a copy of the CC0 Public Domain Dedication along 1.28 + * with this software. If not, see 1.29 + * <http://creativecommons.org/publicdomain/zero/1.0/>. 1.30 + */ 1.31 + 1.32 #include "precompiled.hpp" 1.33 #include "classfile/altHashing.hpp" 1.34 -#include "classfile/symbolTable.hpp" 1.35 #include "classfile/systemDictionary.hpp" 1.36 #include "oops/markOop.hpp" 1.37 -#include "runtime/thread.hpp" 1.38 +#include "runtime/os.hpp" 1.39 1.40 // Get the hash code of the classes mirror if it exists, otherwise just 1.41 // return a random number, which is one of the possible hash code used for 1.42 @@ -39,266 +56,284 @@ 1.43 } 1.44 1.45 // Seed value used for each alternative hash calculated. 1.46 -juint AltHashing::compute_seed() { 1.47 - jlong nanos = os::javaTimeNanos(); 1.48 - jlong now = os::javaTimeMillis(); 1.49 - int SEED_MATERIAL[8] = { 1.50 - (int) object_hash(SystemDictionary::String_klass()), 1.51 - (int) object_hash(SystemDictionary::System_klass()), 1.52 - (int) os::random(), // current thread isn't a java thread 1.53 - (int) (((julong)nanos) >> 32), 1.54 - (int) nanos, 1.55 - (int) (((julong)now) >> 32), 1.56 - (int) now, 1.57 - (int) (os::javaTimeNanos() >> 2) 1.58 +uint64_t AltHashing::compute_seed() { 1.59 + uint64_t nanos = os::javaTimeNanos(); 1.60 + uint64_t now = os::javaTimeMillis(); 1.61 + uint32_t SEED_MATERIAL[8] = { 1.62 + (uint32_t) object_hash(SystemDictionary::String_klass()), 1.63 + (uint32_t) object_hash(SystemDictionary::System_klass()), 1.64 + (uint32_t) os::random(), // current thread isn't a java thread 1.65 + (uint32_t) (((uint64_t)nanos) >> 32), 1.66 + (uint32_t) nanos, 1.67 + (uint32_t) (((uint64_t)now) >> 32), 1.68 + (uint32_t) now, 1.69 + (uint32_t) (os::javaTimeNanos() >> 2) 1.70 }; 1.71 1.72 - return murmur3_32(SEED_MATERIAL, 8); 1.73 + return halfsiphash_64(SEED_MATERIAL, 8); 1.74 } 1.75 1.76 +// utility function copied from java/lang/Integer 1.77 +static uint32_t Integer_rotateLeft(uint32_t i, int distance) { 1.78 + return (i << distance) | (i >> (32 - distance)); 1.79 +} 1.80 1.81 -// Murmur3 hashing for Symbol 1.82 -juint AltHashing::murmur3_32(juint seed, const jbyte* data, int len) { 1.83 - juint h1 = seed; 1.84 +static void halfsiphash_rounds(uint32_t v[4], int rounds) { 1.85 + while (rounds-- > 0) { 1.86 + v[0] += v[1]; 1.87 + v[1] = Integer_rotateLeft(v[1], 5); 1.88 + v[1] ^= v[0]; 1.89 + v[0] = Integer_rotateLeft(v[0], 16); 1.90 + v[2] += v[3]; 1.91 + v[3] = Integer_rotateLeft(v[3], 8); 1.92 + v[3] ^= v[2]; 1.93 + v[0] += v[3]; 1.94 + v[3] = Integer_rotateLeft(v[3], 7); 1.95 + v[3] ^= v[0]; 1.96 + v[2] += v[1]; 1.97 + v[1] = Integer_rotateLeft(v[1], 13); 1.98 + v[1] ^= v[2]; 1.99 + v[2] = Integer_rotateLeft(v[2], 16); 1.100 + } 1.101 + } 1.102 + 1.103 +static void halfsiphash_adddata(uint32_t v[4], uint32_t newdata, int rounds) { 1.104 + v[3] ^= newdata; 1.105 + halfsiphash_rounds(v, rounds); 1.106 + v[0] ^= newdata; 1.107 +} 1.108 + 1.109 +static void halfsiphash_init32(uint32_t v[4], uint64_t seed) { 1.110 + v[0] = seed & 0xffffffff; 1.111 + v[1] = seed >> 32; 1.112 + v[2] = 0x6c796765 ^ v[0]; 1.113 + v[3] = 0x74656462 ^ v[1]; 1.114 +} 1.115 + 1.116 +static void halfsiphash_init64(uint32_t v[4], uint64_t seed) { 1.117 + halfsiphash_init32(v, seed); 1.118 + v[1] ^= 0xee; 1.119 +} 1.120 + 1.121 +static uint64_t halfsiphash_finish64(uint32_t v[4], int rounds) { 1.122 + uint64_t rv; 1.123 + v[2] ^= 0xee; 1.124 + halfsiphash_rounds(v, rounds); 1.125 + rv = v[1] ^ v[3]; 1.126 + v[1] ^= 0xdd; 1.127 + halfsiphash_rounds(v, rounds); 1.128 + rv |= (uint64_t)(v[1] ^ v[3]) << 32; 1.129 + return rv; 1.130 +} 1.131 + 1.132 +// HalfSipHash-2-4 (64-bit output) for Symbols 1.133 +uint64_t AltHashing::halfsiphash_64(uint64_t seed, const int8_t* data, int len) { 1.134 + uint32_t v[4]; 1.135 + uint32_t newdata; 1.136 + int off = 0; 1.137 int count = len; 1.138 - int offset = 0; 1.139 1.140 + halfsiphash_init64(v, seed); 1.141 // body 1.142 while (count >= 4) { 1.143 - juint k1 = (data[offset] & 0x0FF) 1.144 - | (data[offset + 1] & 0x0FF) << 8 1.145 - | (data[offset + 2] & 0x0FF) << 16 1.146 - | data[offset + 3] << 24; 1.147 + // Avoid sign extension with 0x0ff 1.148 + newdata = (data[off] & 0x0FF) 1.149 + | (data[off + 1] & 0x0FF) << 8 1.150 + | (data[off + 2] & 0x0FF) << 16 1.151 + | data[off + 3] << 24; 1.152 1.153 count -= 4; 1.154 - offset += 4; 1.155 + off += 4; 1.156 1.157 - k1 *= 0xcc9e2d51; 1.158 - k1 = Integer_rotateLeft(k1, 15); 1.159 - k1 *= 0x1b873593; 1.160 - 1.161 - h1 ^= k1; 1.162 - h1 = Integer_rotateLeft(h1, 13); 1.163 - h1 = h1 * 5 + 0xe6546b64; 1.164 + halfsiphash_adddata(v, newdata, 2); 1.165 } 1.166 1.167 // tail 1.168 + newdata = ((uint32_t)len) << 24; // (Byte.SIZE / Byte.SIZE); 1.169 1.170 if (count > 0) { 1.171 - juint k1 = 0; 1.172 - 1.173 switch (count) { 1.174 case 3: 1.175 - k1 ^= (data[offset + 2] & 0xff) << 16; 1.176 + newdata |= (data[off + 2] & 0x0ff) << 16; 1.177 // fall through 1.178 case 2: 1.179 - k1 ^= (data[offset + 1] & 0xff) << 8; 1.180 + newdata |= (data[off + 1] & 0x0ff) << 8; 1.181 // fall through 1.182 case 1: 1.183 - k1 ^= (data[offset] & 0xff); 1.184 + newdata |= (data[off] & 0x0ff); 1.185 // fall through 1.186 - default: 1.187 - k1 *= 0xcc9e2d51; 1.188 - k1 = Integer_rotateLeft(k1, 15); 1.189 - k1 *= 0x1b873593; 1.190 - h1 ^= k1; 1.191 } 1.192 } 1.193 1.194 + halfsiphash_adddata(v, newdata, 2); 1.195 + 1.196 // finalization 1.197 - h1 ^= len; 1.198 - 1.199 - // finalization mix force all bits of a hash block to avalanche 1.200 - h1 ^= h1 >> 16; 1.201 - h1 *= 0x85ebca6b; 1.202 - h1 ^= h1 >> 13; 1.203 - h1 *= 0xc2b2ae35; 1.204 - h1 ^= h1 >> 16; 1.205 - 1.206 - return h1; 1.207 + return halfsiphash_finish64(v, 4); 1.208 } 1.209 1.210 -// Murmur3 hashing for Strings 1.211 -juint AltHashing::murmur3_32(juint seed, const jchar* data, int len) { 1.212 - juint h1 = seed; 1.213 - 1.214 +// HalfSipHash-2-4 (64-bit output) for Strings 1.215 +uint64_t AltHashing::halfsiphash_64(uint64_t seed, const uint16_t* data, int len) { 1.216 + uint32_t v[4]; 1.217 + uint32_t newdata; 1.218 int off = 0; 1.219 int count = len; 1.220 1.221 + halfsiphash_init64(v, seed); 1.222 + 1.223 // body 1.224 while (count >= 2) { 1.225 - jchar d1 = data[off++] & 0xFFFF; 1.226 - jchar d2 = data[off++]; 1.227 - juint k1 = (d1 | d2 << 16); 1.228 + uint16_t d1 = data[off++] & 0x0FFFF; 1.229 + uint16_t d2 = data[off++]; 1.230 + newdata = (d1 | d2 << 16); 1.231 1.232 count -= 2; 1.233 1.234 - k1 *= 0xcc9e2d51; 1.235 - k1 = Integer_rotateLeft(k1, 15); 1.236 - k1 *= 0x1b873593; 1.237 - 1.238 - h1 ^= k1; 1.239 - h1 = Integer_rotateLeft(h1, 13); 1.240 - h1 = h1 * 5 + 0xe6546b64; 1.241 + halfsiphash_adddata(v, newdata, 2); 1.242 } 1.243 1.244 // tail 1.245 - 1.246 + newdata = ((uint32_t)len * 2) << 24; // (Character.SIZE / Byte.SIZE); 1.247 if (count > 0) { 1.248 - juint k1 = (juint)data[off]; 1.249 - 1.250 - k1 *= 0xcc9e2d51; 1.251 - k1 = Integer_rotateLeft(k1, 15); 1.252 - k1 *= 0x1b873593; 1.253 - h1 ^= k1; 1.254 + newdata |= (uint32_t)data[off]; 1.255 } 1.256 + halfsiphash_adddata(v, newdata, 2); 1.257 1.258 // finalization 1.259 - h1 ^= len * 2; // (Character.SIZE / Byte.SIZE); 1.260 - 1.261 - // finalization mix force all bits of a hash block to avalanche 1.262 - h1 ^= h1 >> 16; 1.263 - h1 *= 0x85ebca6b; 1.264 - h1 ^= h1 >> 13; 1.265 - h1 *= 0xc2b2ae35; 1.266 - h1 ^= h1 >> 16; 1.267 - 1.268 - return h1; 1.269 + return halfsiphash_finish64(v, 4); 1.270 } 1.271 1.272 -// Hash used for the seed. 1.273 -juint AltHashing::murmur3_32(juint seed, const int* data, int len) { 1.274 - juint h1 = seed; 1.275 +// HalfSipHash-2-4 (64-bit output) for integers (used to create seed) 1.276 +uint64_t AltHashing::halfsiphash_64(uint64_t seed, const uint32_t* data, int len) { 1.277 + uint32_t v[4]; 1.278 1.279 int off = 0; 1.280 int end = len; 1.281 1.282 + halfsiphash_init64(v, seed); 1.283 + 1.284 // body 1.285 while (off < end) { 1.286 - juint k1 = (juint)data[off++]; 1.287 - 1.288 - k1 *= 0xcc9e2d51; 1.289 - k1 = Integer_rotateLeft(k1, 15); 1.290 - k1 *= 0x1b873593; 1.291 - 1.292 - h1 ^= k1; 1.293 - h1 = Integer_rotateLeft(h1, 13); 1.294 - h1 = h1 * 5 + 0xe6546b64; 1.295 + halfsiphash_adddata(v, (uint32_t)data[off++], 2); 1.296 } 1.297 1.298 // tail (always empty, as body is always 32-bit chunks) 1.299 1.300 // finalization 1.301 - 1.302 - h1 ^= len * 4; // (Integer.SIZE / Byte.SIZE); 1.303 - 1.304 - // finalization mix force all bits of a hash block to avalanche 1.305 - h1 ^= h1 >> 16; 1.306 - h1 *= 0x85ebca6b; 1.307 - h1 ^= h1 >> 13; 1.308 - h1 *= 0xc2b2ae35; 1.309 - h1 ^= h1 >> 16; 1.310 - 1.311 - return h1; 1.312 + halfsiphash_adddata(v, ((uint32_t)len * 4) << 24, 2); // (Integer.SIZE / Byte.SIZE); 1.313 + return halfsiphash_finish64(v, 4); 1.314 } 1.315 1.316 -juint AltHashing::murmur3_32(const int* data, int len) { 1.317 - return murmur3_32(0, data, len); 1.318 +// HalfSipHash-2-4 (64-bit output) for integers (used to create seed) 1.319 +uint64_t AltHashing::halfsiphash_64(const uint32_t* data, int len) { 1.320 + return halfsiphash_64((uint64_t)0, data, len); 1.321 } 1.322 1.323 #ifndef PRODUCT 1.324 -// Overloaded versions for internal test. 1.325 -juint AltHashing::murmur3_32(const jbyte* data, int len) { 1.326 - return murmur3_32(0, data, len); 1.327 -} 1.328 + void AltHashing::testHalfsiphash_64_ByteArray() { 1.329 + // printf("testHalfsiphash_64_CharArray\n"); 1.330 + const int factor = 4; 1.331 1.332 -juint AltHashing::murmur3_32(const jchar* data, int len) { 1.333 - return murmur3_32(0, data, len); 1.334 -} 1.335 + int8_t vector[256]; 1.336 + int8_t hashes[factor * 256]; 1.337 1.338 -// Internal test for alternate hashing. Translated from JDK version 1.339 -// test/sun/misc/Hashing.java 1.340 -static const jbyte ONE_BYTE[] = { (jbyte) 0x80}; 1.341 -static const jbyte TWO_BYTE[] = { (jbyte) 0x80, (jbyte) 0x81}; 1.342 -static const jchar ONE_CHAR[] = { (jchar) 0x8180}; 1.343 -static const jbyte THREE_BYTE[] = { (jbyte) 0x80, (jbyte) 0x81, (jbyte) 0x82}; 1.344 -static const jbyte FOUR_BYTE[] = { (jbyte) 0x80, (jbyte) 0x81, (jbyte) 0x82, (jbyte) 0x83}; 1.345 -static const jchar TWO_CHAR[] = { (jchar) 0x8180, (jchar) 0x8382}; 1.346 -static const jint ONE_INT[] = { (jint) 0x83828180}; 1.347 -static const jbyte SIX_BYTE[] = { (jbyte) 0x80, (jbyte) 0x81, (jbyte) 0x82, (jbyte) 0x83, (jbyte) 0x84, (jbyte) 0x85}; 1.348 -static const jchar THREE_CHAR[] = { (jchar) 0x8180, (jchar) 0x8382, (jchar) 0x8584}; 1.349 -static const jbyte EIGHT_BYTE[] = { 1.350 - (jbyte) 0x80, (jbyte) 0x81, (jbyte) 0x82, 1.351 - (jbyte) 0x83, (jbyte) 0x84, (jbyte) 0x85, 1.352 - (jbyte) 0x86, (jbyte) 0x87}; 1.353 -static const jchar FOUR_CHAR[] = { 1.354 - (jchar) 0x8180, (jchar) 0x8382, 1.355 - (jchar) 0x8584, (jchar) 0x8786}; 1.356 + for (int i = 0; i < 256; i++) { 1.357 + vector[i] = (int8_t) i; 1.358 + } 1.359 1.360 -static const jint TWO_INT[] = { (jint) 0x83828180, (jint) 0x87868584}; 1.361 + // Hash subranges {}, {0}, {0,1}, {0,1,2}, ..., {0,...,255} 1.362 + for (int i = 0; i < 256; i++) { 1.363 + uint64_t hash = AltHashing::halfsiphash_64(256 - i, vector, i); 1.364 + hashes[i * factor] = (int8_t) hash; 1.365 + hashes[i * factor + 1] = (int8_t)(hash >> 8); 1.366 + hashes[i * factor + 2] = (int8_t)(hash >> 16); 1.367 + hashes[i * factor + 3] = (int8_t)(hash >> 24); 1.368 + } 1.369 1.370 -static const juint MURMUR3_32_X86_CHECK_VALUE = 0xB0F57EE3; 1.371 + // hash to get const result. 1.372 + uint64_t final_hash = AltHashing::halfsiphash_64(0, hashes, factor*256); 1.373 1.374 -void AltHashing::testMurmur3_32_ByteArray() { 1.375 - // printf("testMurmur3_32_ByteArray\n"); 1.376 + // Value found using reference implementation for the hashes array. 1.377 + // halfsiphash((const uint8_t*)hashes, factor*256, (const uint8_t *)&k, 1.378 + // (uint8_t*)&reference, 8); 1.379 1.380 - jbyte vector[256]; 1.381 - jbyte hashes[4 * 256]; 1.382 + static const uint64_t HALFSIPHASH_64_BYTE_CHECK_VALUE = 0x15a7911e30917ee8; 1.383 1.384 - for (int i = 0; i < 256; i++) { 1.385 - vector[i] = (jbyte) i; 1.386 + assert (HALFSIPHASH_64_BYTE_CHECK_VALUE == final_hash, 1.387 + err_msg( 1.388 + "Calculated hash result not as expected. Expected " UINT64_FORMAT " got " UINT64_FORMAT, 1.389 + HALFSIPHASH_64_BYTE_CHECK_VALUE, 1.390 + final_hash)); 1.391 } 1.392 1.393 - // Hash subranges {}, {0}, {0,1}, {0,1,2}, ..., {0,...,255} 1.394 - for (int i = 0; i < 256; i++) { 1.395 - juint hash = murmur3_32(256 - i, vector, i); 1.396 - hashes[i * 4] = (jbyte) hash; 1.397 - hashes[i * 4 + 1] = (jbyte)(hash >> 8); 1.398 - hashes[i * 4 + 2] = (jbyte)(hash >> 16); 1.399 - hashes[i * 4 + 3] = (jbyte)(hash >> 24); 1.400 + void AltHashing::testHalfsiphash_64_CharArray() { 1.401 + // printf("testHalfsiphash_64_CharArray\n"); 1.402 + const int factor = 2; 1.403 + 1.404 + uint16_t vector[256]; 1.405 + uint16_t hashes[factor * 256]; 1.406 + 1.407 + for (int i = 0; i < 256; i++) { 1.408 + vector[i] = (uint16_t) i; 1.409 + } 1.410 + 1.411 + // Hash subranges {}, {0}, {0,1}, {0,1,2}, ..., {0,...,255} 1.412 + for (int i = 0; i < 256; i++) { 1.413 + uint64_t hash = AltHashing::halfsiphash_64(256 - i, vector, i); 1.414 + hashes[i * factor] = (uint16_t) hash; 1.415 + hashes[i * factor + 1] = (uint16_t)(hash >> 16); 1.416 + } 1.417 + 1.418 + // hash to get const result. 1.419 + uint64_t final_hash = AltHashing::halfsiphash_64(0, hashes, factor*256); 1.420 + 1.421 + // Value found using reference implementation for the hashes array. 1.422 + // halfsiphash((const uint8_t*)hashes, 2*factor*256, (const uint8_t *)&k, 1.423 + // (uint8_t*)&reference, 8); 1.424 + static const uint64_t HALFSIPHASH_64_CHAR_CHECK_VALUE = 0xf392d8a6a9e24103; 1.425 + 1.426 + assert(HALFSIPHASH_64_CHAR_CHECK_VALUE == final_hash, 1.427 + err_msg( 1.428 + "Calculated hash result not as expected. Expected " UINT64_FORMAT " got " UINT64_FORMAT, 1.429 + HALFSIPHASH_64_CHAR_CHECK_VALUE, 1.430 + final_hash)); 1.431 } 1.432 1.433 - // hash to get const result. 1.434 - juint final_hash = murmur3_32(hashes, 4*256); 1.435 + // Test against sample hashes published with the reference implementation: 1.436 + // https://github.com/veorq/SipHash 1.437 + void AltHashing::testHalfsiphash_64_FromReference() { 1.438 + // printf("testHalfsiphash_64_FromReference\n"); 1.439 1.440 - assert (MURMUR3_32_X86_CHECK_VALUE == final_hash, 1.441 - err_msg( 1.442 - "Calculated hash result not as expected. Expected %08X got %08X\n", 1.443 - MURMUR3_32_X86_CHECK_VALUE, 1.444 - final_hash)); 1.445 -} 1.446 + const uint64_t seed = 0x0706050403020100; 1.447 + const uint64_t results[16] = { 1.448 + 0xc83cb8b9591f8d21, 0xa12ee55b178ae7d5, 1.449 + 0x8c85e4bc20e8feed, 0x99c7f5ae9f1fc77b, 1.450 + 0xb5f37b5fd2aa3673, 0xdba7ee6f0a2bf51b, 1.451 + 0xf1a63fae45107470, 0xb516001efb5f922d, 1.452 + 0x6c6211d8469d7028, 0xdc7642ec407ad686, 1.453 + 0x4caec8671cc8385b, 0x5ab1dc27adf3301e, 1.454 + 0x3e3ea94bc0a8eaa9, 0xe150f598795a4402, 1.455 + 0x1d5ff142f992a4a1, 0x60e426bf902876d6 1.456 + }; 1.457 + uint32_t vector[16]; 1.458 1.459 -void AltHashing::testEquivalentHashes() { 1.460 - juint jbytes, jchars, ints; 1.461 + for (int i = 0; i < 16; i++) 1.462 + vector[i] = 0x03020100 + i * 0x04040404; 1.463 1.464 - // printf("testEquivalentHashes\n"); 1.465 + for (int i = 0; i < 16; i++) { 1.466 + uint64_t hash = AltHashing::halfsiphash_64(seed, vector, i); 1.467 + assert(results[i] == hash, 1.468 + err_msg( 1.469 + "Calculated hash result not as expected. Round %d: " 1.470 + "Expected " UINT64_FORMAT_X " got " UINT64_FORMAT_X "\n", 1.471 + i, 1.472 + results[i], 1.473 + hash)); 1.474 + } 1.475 + } 1.476 1.477 - jbytes = murmur3_32(TWO_BYTE, 2); 1.478 - jchars = murmur3_32(ONE_CHAR, 1); 1.479 - assert (jbytes == jchars, 1.480 - err_msg("Hashes did not match. b:%08x != c:%08x\n", jbytes, jchars)); 1.481 - 1.482 - jbytes = murmur3_32(FOUR_BYTE, 4); 1.483 - jchars = murmur3_32(TWO_CHAR, 2); 1.484 - ints = murmur3_32(ONE_INT, 1); 1.485 - assert ((jbytes == jchars) && (jbytes == ints), 1.486 - err_msg("Hashes did not match. b:%08x != c:%08x != i:%08x\n", jbytes, jchars, ints)); 1.487 - 1.488 - jbytes = murmur3_32(SIX_BYTE, 6); 1.489 - jchars = murmur3_32(THREE_CHAR, 3); 1.490 - assert (jbytes == jchars, 1.491 - err_msg("Hashes did not match. b:%08x != c:%08x\n", jbytes, jchars)); 1.492 - 1.493 - jbytes = murmur3_32(EIGHT_BYTE, 8); 1.494 - jchars = murmur3_32(FOUR_CHAR, 4); 1.495 - ints = murmur3_32(TWO_INT, 2); 1.496 - assert ((jbytes == jchars) && (jbytes == ints), 1.497 - err_msg("Hashes did not match. b:%08x != c:%08x != i:%08x\n", jbytes, jchars, ints)); 1.498 -} 1.499 - 1.500 -// Returns true if the alternate hashcode is correct 1.501 void AltHashing::test_alt_hash() { 1.502 - testMurmur3_32_ByteArray(); 1.503 - testEquivalentHashes(); 1.504 + testHalfsiphash_64_ByteArray(); 1.505 + testHalfsiphash_64_CharArray(); 1.506 + testHalfsiphash_64_FromReference(); 1.507 } 1.508 #endif // PRODUCT
2.1 --- a/src/share/vm/classfile/altHashing.hpp Sat Sep 12 00:09:03 2020 +0300 2.2 +++ b/src/share/vm/classfile/altHashing.hpp Wed Sep 23 15:18:53 2020 +0300 2.3 @@ -26,37 +26,30 @@ 2.4 #define SHARE_VM_CLASSFILE_ALTHASHING_HPP 2.5 2.6 #include "prims/jni.h" 2.7 -#include "classfile/symbolTable.hpp" 2.8 +#include "memory/allocation.hpp" 2.9 2.10 /** 2.11 - * Hashing utilities. 2.12 - * 2.13 - * Implementation of Murmur3 hashing. 2.14 - * This code was translated from src/share/classes/sun/misc/Hashing.java 2.15 - * code in the JDK. 2.16 + * Implementation of alternate more secure hashing. 2.17 */ 2.18 2.19 class AltHashing : AllStatic { 2.20 2.21 - // utility function copied from java/lang/Integer 2.22 - static juint Integer_rotateLeft(juint i, int distance) { 2.23 - return (i << distance) | (i >> (32-distance)); 2.24 - } 2.25 - static juint murmur3_32(const int* data, int len); 2.26 - static juint murmur3_32(juint seed, const int* data, int len); 2.27 + // For the seed computation 2.28 + static uint64_t halfsiphash_64(const uint32_t* data, int len); 2.29 + static uint64_t halfsiphash_64(uint64_t seed, const uint32_t* data, int len); 2.30 + #ifndef PRODUCT 2.31 + // Hashing functions used for internal testing 2.32 + static void testHalfsiphash_64_ByteArray(); 2.33 + static void testHalfsiphash_64_CharArray(); 2.34 + static void testHalfsiphash_64_FromReference(); 2.35 + #endif // PRODUCT 2.36 + public: 2.37 + static uint64_t compute_seed(); 2.38 2.39 -#ifndef PRODUCT 2.40 - // Hashing functions used for internal testing 2.41 - static juint murmur3_32(const jbyte* data, int len); 2.42 - static juint murmur3_32(const jchar* data, int len); 2.43 - static void testMurmur3_32_ByteArray(); 2.44 - static void testEquivalentHashes(); 2.45 -#endif // PRODUCT 2.46 - 2.47 - public: 2.48 - static juint compute_seed(); 2.49 - static juint murmur3_32(juint seed, const jbyte* data, int len); 2.50 - static juint murmur3_32(juint seed, const jchar* data, int len); 2.51 + // For Symbols 2.52 + static uint64_t halfsiphash_64(uint64_t seed, const int8_t* data, int len); 2.53 + // For Strings 2.54 + static uint64_t halfsiphash_64(uint64_t seed, const uint16_t* data, int len); 2.55 NOT_PRODUCT(static void test_alt_hash();) 2.56 }; 2.57 #endif // SHARE_VM_CLASSFILE_ALTHASHING_HPP
3.1 --- a/src/share/vm/classfile/symbolTable.cpp Sat Sep 12 00:09:03 2020 +0300 3.2 +++ b/src/share/vm/classfile/symbolTable.cpp Wed Sep 23 15:18:53 2020 +0300 3.3 @@ -1,5 +1,5 @@ 3.4 /* 3.5 - * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. 3.6 + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. 3.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 3.8 * 3.9 * This code is free software; you can redistribute it and/or modify it 3.10 @@ -224,7 +224,7 @@ 3.11 // Pick hashing algorithm. 3.12 unsigned int SymbolTable::hash_symbol(const char* s, int len) { 3.13 return use_alternate_hashcode() ? 3.14 - AltHashing::murmur3_32(seed(), (const jbyte*)s, len) : 3.15 + AltHashing::halfsiphash_64(seed(), (const int8_t*)s, len) : 3.16 java_lang_String::hash_code(s, len); 3.17 } 3.18 3.19 @@ -650,7 +650,7 @@ 3.20 3.21 // Pick hashing algorithm 3.22 unsigned int StringTable::hash_string(const jchar* s, int len) { 3.23 - return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) : 3.24 + return use_alternate_hashcode() ? AltHashing::halfsiphash_64(seed(), s, len) : 3.25 java_lang_String::hash_code(s, len); 3.26 } 3.27
4.1 --- a/src/share/vm/gc_implementation/g1/g1StringDedupTable.cpp Sat Sep 12 00:09:03 2020 +0300 4.2 +++ b/src/share/vm/gc_implementation/g1/g1StringDedupTable.cpp Wed Sep 23 15:18:53 2020 +0300 4.3 @@ -150,7 +150,7 @@ 4.4 assert(worker_id < _nlists, "Invalid worker id"); 4.5 4.6 entry->set_obj(NULL); 4.7 - entry->set_hash(0); 4.8 + entry->set_java_hash(0); 4.9 4.10 if (_cached[worker_id].length() < _max_list_length) { 4.11 // Cache is not full 4.12 @@ -215,7 +215,7 @@ 4.13 uintx G1StringDedupTable::_resize_count = 0; 4.14 uintx G1StringDedupTable::_rehash_count = 0; 4.15 4.16 -G1StringDedupTable::G1StringDedupTable(size_t size, jint hash_seed) : 4.17 +G1StringDedupTable::G1StringDedupTable(size_t size, uint64_t hash_seed) : 4.18 _size(size), 4.19 _entries(0), 4.20 _grow_threshold((uintx)(size * _grow_load_factor)), 4.21 @@ -237,10 +237,14 @@ 4.22 _table = new G1StringDedupTable(_min_size); 4.23 } 4.24 4.25 -void G1StringDedupTable::add(typeArrayOop value, unsigned int hash, G1StringDedupEntry** list) { 4.26 +void G1StringDedupTable::add(typeArrayOop value, uint64_t hash, G1StringDedupEntry** list) { 4.27 G1StringDedupEntry* entry = _entry_cache->alloc(); 4.28 entry->set_obj(value); 4.29 - entry->set_hash(hash); 4.30 + if (use_java_hash()) { 4.31 + entry->set_java_hash((unsigned int)hash); 4.32 + } else { 4.33 + entry->set_alt_hash(hash); 4.34 + } 4.35 entry->set_next(*list); 4.36 *list = entry; 4.37 _entries++; 4.38 @@ -255,7 +259,7 @@ 4.39 void G1StringDedupTable::transfer(G1StringDedupEntry** pentry, G1StringDedupTable* dest) { 4.40 G1StringDedupEntry* entry = *pentry; 4.41 *pentry = entry->next(); 4.42 - unsigned int hash = entry->hash(); 4.43 + uint64_t hash = use_java_hash() ? entry->java_hash() : entry->alt_hash(); 4.44 size_t index = dest->hash_to_index(hash); 4.45 G1StringDedupEntry** list = dest->bucket(index); 4.46 entry->set_next(*list); 4.47 @@ -270,10 +274,10 @@ 4.48 value1->length() * sizeof(jchar))))); 4.49 } 4.50 4.51 -typeArrayOop G1StringDedupTable::lookup(typeArrayOop value, unsigned int hash, 4.52 +typeArrayOop G1StringDedupTable::lookup(typeArrayOop value, uint64_t hash, 4.53 G1StringDedupEntry** list, uintx &count) { 4.54 for (G1StringDedupEntry* entry = *list; entry != NULL; entry = entry->next()) { 4.55 - if (entry->hash() == hash) { 4.56 + if ((use_java_hash() ? entry->java_hash() : entry->alt_hash()) == hash) { 4.57 typeArrayOop existing_value = entry->obj(); 4.58 if (equals(value, existing_value)) { 4.59 // Match found 4.60 @@ -287,7 +291,7 @@ 4.61 return NULL; 4.62 } 4.63 4.64 -typeArrayOop G1StringDedupTable::lookup_or_add_inner(typeArrayOop value, unsigned int hash) { 4.65 +typeArrayOop G1StringDedupTable::lookup_or_add_inner(typeArrayOop value, uint64_t hash) { 4.66 size_t index = hash_to_index(hash); 4.67 G1StringDedupEntry** list = bucket(index); 4.68 uintx count = 0; 4.69 @@ -311,20 +315,25 @@ 4.70 return existing_value; 4.71 } 4.72 4.73 -unsigned int G1StringDedupTable::hash_code(typeArrayOop value) { 4.74 +unsigned int G1StringDedupTable::java_hash_code(typeArrayOop value) { 4.75 + assert(use_java_hash(), "Should not use java hash code"); 4.76 unsigned int hash; 4.77 int length = value->length(); 4.78 const jchar* data = (jchar*)value->base(T_CHAR); 4.79 4.80 - if (use_java_hash()) { 4.81 - hash = java_lang_String::hash_code(data, length); 4.82 - } else { 4.83 - hash = AltHashing::murmur3_32(_table->_hash_seed, data, length); 4.84 - } 4.85 + hash = java_lang_String::hash_code(data, length); 4.86 4.87 return hash; 4.88 } 4.89 4.90 +uint64_t G1StringDedupTable::alt_hash_code(typeArrayOop value) { 4.91 + assert(!use_java_hash(), "Should not use alt hash code"); 4.92 + 4.93 + int length = value->length(); 4.94 + const jbyte* data = (jbyte*)value->base(T_BYTE); 4.95 + return AltHashing::halfsiphash_64(_table->_hash_seed, (const int8_t*)data, length); 4.96 +} 4.97 + 4.98 void G1StringDedupTable::deduplicate(oop java_string, G1StringDedupStat& stat) { 4.99 assert(java_lang_String::is_instance(java_string), "Must be a string"); 4.100 No_Safepoint_Verifier nsv; 4.101 @@ -338,7 +347,7 @@ 4.102 return; 4.103 } 4.104 4.105 - unsigned int hash = 0; 4.106 + uint64_t hash = 0; 4.107 4.108 if (use_java_hash()) { 4.109 // Get hash code from cache 4.110 @@ -347,7 +356,7 @@ 4.111 4.112 if (hash == 0) { 4.113 // Compute hash 4.114 - hash = hash_code(value); 4.115 + hash = alt_hash_code(value); 4.116 stat.inc_hashed(); 4.117 } 4.118 4.119 @@ -501,8 +510,9 @@ 4.120 // destination partitions. finish_rehash() will do a single 4.121 // threaded transfer of all entries. 4.122 typeArrayOop value = (typeArrayOop)*p; 4.123 - unsigned int hash = hash_code(value); 4.124 - (*entry)->set_hash(hash); 4.125 + assert(!use_java_hash(), "Should not be using Java hash"); 4.126 + uint64_t hash = alt_hash_code(value); 4.127 + (*entry)->set_alt_hash(hash); 4.128 } 4.129 4.130 // Move to next entry 4.131 @@ -565,8 +575,14 @@ 4.132 guarantee(Universe::heap()->is_in_reserved(value), "Object must be on the heap"); 4.133 guarantee(!value->is_forwarded(), "Object must not be forwarded"); 4.134 guarantee(value->is_typeArray(), "Object must be a typeArrayOop"); 4.135 - unsigned int hash = hash_code(value); 4.136 - guarantee((*entry)->hash() == hash, "Table entry has inorrect hash"); 4.137 + uint64_t hash; 4.138 + if (use_java_hash()) { 4.139 + hash = (*entry)->java_hash(); 4.140 + guarantee(java_hash_code(value) == hash, "Table entry has incorrect hash"); 4.141 + } else { 4.142 + hash = (*entry)->alt_hash(); 4.143 + guarantee(alt_hash_code(value) == hash, "Table entry has incorrect hash"); 4.144 + } 4.145 guarantee(_table->hash_to_index(hash) == bucket, "Table entry has incorrect index"); 4.146 entry = (*entry)->next_addr(); 4.147 } 4.148 @@ -581,7 +597,10 @@ 4.149 G1StringDedupEntry** entry2 = (*entry1)->next_addr(); 4.150 while (*entry2 != NULL) { 4.151 typeArrayOop value2 = (*entry2)->obj(); 4.152 - guarantee(!equals(value1, value2), "Table entries must not have identical arrays"); 4.153 + guarantee(value1 != value2, "Table entries must not have the same array"); 4.154 + if (use_java_hash()) { 4.155 + guarantee(!equals(value1, value2), "Table entries must not have identical arrays"); 4.156 + } 4.157 entry2 = (*entry2)->next_addr(); 4.158 } 4.159 entry1 = (*entry1)->next_addr(); 4.160 @@ -600,7 +619,7 @@ 4.161 " [Size: " SIZE_FORMAT ", Min: " SIZE_FORMAT ", Max: " SIZE_FORMAT "]\n" 4.162 " [Entries: " UINTX_FORMAT ", Load: " G1_STRDEDUP_PERCENT_FORMAT_NS ", Cached: " UINTX_FORMAT ", Added: " UINTX_FORMAT ", Removed: " UINTX_FORMAT "]\n" 4.163 " [Resize Count: " UINTX_FORMAT ", Shrink Threshold: " UINTX_FORMAT "(" G1_STRDEDUP_PERCENT_FORMAT_NS "), Grow Threshold: " UINTX_FORMAT "(" G1_STRDEDUP_PERCENT_FORMAT_NS ")]\n" 4.164 - " [Rehash Count: " UINTX_FORMAT ", Rehash Threshold: " UINTX_FORMAT ", Hash Seed: 0x%x]\n" 4.165 + " [Rehash Count: " UINTX_FORMAT ", Rehash Threshold: " UINTX_FORMAT ", Hash Seed: " UINT64_FORMAT_X "]\n" 4.166 " [Age Threshold: " UINTX_FORMAT "]", 4.167 G1_STRDEDUP_BYTES_PARAM(_table->_size * sizeof(G1StringDedupEntry*) + (_table->_entries + _entry_cache->size()) * sizeof(G1StringDedupEntry)), 4.168 _table->_size, _min_size, _max_size,
5.1 --- a/src/share/vm/gc_implementation/g1/g1StringDedupTable.hpp Sat Sep 12 00:09:03 2020 +0300 5.2 +++ b/src/share/vm/gc_implementation/g1/g1StringDedupTable.hpp Wed Sep 23 15:18:53 2020 +0300 5.3 @@ -38,8 +38,10 @@ 5.4 class G1StringDedupEntry : public CHeapObj<mtGC> { 5.5 private: 5.6 G1StringDedupEntry* _next; 5.7 - unsigned int _hash; 5.8 - typeArrayOop _obj; 5.9 + uint64_t _hash; 5.10 + typeArrayOop _obj; 5.11 + 5.12 + static const uint64_t java_hash_mask = ((uint64_t)1 << 32) - 1; 5.13 5.14 public: 5.15 G1StringDedupEntry() : 5.16 @@ -60,11 +62,19 @@ 5.17 _next = next; 5.18 } 5.19 5.20 - unsigned int hash() { 5.21 + unsigned int java_hash() { 5.22 + return (unsigned int)(_hash & java_hash_mask); 5.23 + } 5.24 + 5.25 + void set_java_hash(unsigned int hash) { 5.26 + _hash = hash; 5.27 + } 5.28 + 5.29 + uint64_t alt_hash() { 5.30 return _hash; 5.31 } 5.32 5.33 - void set_hash(unsigned int hash) { 5.34 + void set_alt_hash(uint64_t hash) { 5.35 _hash = hash; 5.36 } 5.37 5.38 @@ -119,8 +129,8 @@ 5.39 // The hash seed also dictates which hash function to use. A 5.40 // zero hash seed means we will use the Java compatible hash 5.41 // function (which doesn't use a seed), and a non-zero hash 5.42 - // seed means we use the murmur3 hash function. 5.43 - jint _hash_seed; 5.44 + // seed means we use the alternate and better hash function. 5.45 + uint64_t _hash_seed; 5.46 5.47 // Constants governing table resize/rehash/cache. 5.48 static const size_t _min_size; 5.49 @@ -137,7 +147,7 @@ 5.50 static uintx _resize_count; 5.51 static uintx _rehash_count; 5.52 5.53 - G1StringDedupTable(size_t size, jint hash_seed = 0); 5.54 + G1StringDedupTable(size_t size, uint64_t hash_seed = 0); 5.55 ~G1StringDedupTable(); 5.56 5.57 // Returns the hash bucket at the given index. 5.58 @@ -146,12 +156,12 @@ 5.59 } 5.60 5.61 // Returns the hash bucket index for the given hash code. 5.62 - size_t hash_to_index(unsigned int hash) { 5.63 + size_t hash_to_index(uint64_t hash) { 5.64 return (size_t)hash & (_size - 1); 5.65 } 5.66 5.67 // Adds a new table entry to the given hash bucket. 5.68 - void add(typeArrayOop value, unsigned int hash, G1StringDedupEntry** list); 5.69 + void add(typeArrayOop value, uint64_t hash, G1StringDedupEntry** list); 5.70 5.71 // Removes the given table entry from the table. 5.72 void remove(G1StringDedupEntry** pentry, uint worker_id); 5.73 @@ -161,15 +171,15 @@ 5.74 5.75 // Returns an existing character array in the given hash bucket, or NULL 5.76 // if no matching character array exists. 5.77 - typeArrayOop lookup(typeArrayOop value, unsigned int hash, 5.78 + typeArrayOop lookup(typeArrayOop value, uint64_t hash, 5.79 G1StringDedupEntry** list, uintx &count); 5.80 5.81 // Returns an existing character array in the table, or inserts a new 5.82 // table entry if no matching character array exists. 5.83 - typeArrayOop lookup_or_add_inner(typeArrayOop value, unsigned int hash); 5.84 + typeArrayOop lookup_or_add_inner(typeArrayOop value, uint64_t hash); 5.85 5.86 // Thread safe lookup or add of table entry 5.87 - static typeArrayOop lookup_or_add(typeArrayOop value, unsigned int hash) { 5.88 + static typeArrayOop lookup_or_add(typeArrayOop value, uint64_t hash) { 5.89 // Protect the table from concurrent access. Also note that this lock 5.90 // acts as a fence for _table, which could have been replaced by a new 5.91 // instance if the table was resized or rehashed. 5.92 @@ -187,7 +197,8 @@ 5.93 5.94 // Computes the hash code for the given character array, using the 5.95 // currently active hash function and hash seed. 5.96 - static unsigned int hash_code(typeArrayOop value); 5.97 + static unsigned int java_hash_code(typeArrayOop value); 5.98 + static uint64_t alt_hash_code(typeArrayOop value); 5.99 5.100 static uintx unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl, 5.101 size_t partition_begin,
6.1 --- a/src/share/vm/memory/filemap.cpp Sat Sep 12 00:09:03 2020 +0300 6.2 +++ b/src/share/vm/memory/filemap.cpp Wed Sep 23 15:18:53 2020 +0300 6.3 @@ -125,13 +125,13 @@ 6.4 } else { 6.5 // Get the hash value. Use a static seed because the hash needs to return the same 6.6 // value over multiple jvm invocations. 6.7 - unsigned int hash = AltHashing::murmur3_32(8191, (const jbyte*)vm_version, version_len); 6.8 + uint64_t hash = AltHashing::halfsiphash_64(8191, (const int8_t*)vm_version, version_len); 6.9 6.10 - // Truncate the ident, saving room for the 8 hex character hash value. 6.11 - strncpy(header_version, vm_version, JVM_IDENT_MAX-9); 6.12 + // Truncate the ident, saving room for the 16 hex character hash value. 6.13 + strncpy(header_version, vm_version, JVM_IDENT_MAX-17); 6.14 6.15 - // Append the hash code as eight hex digits. 6.16 - sprintf(&header_version[JVM_IDENT_MAX-9], "%08x", hash); 6.17 + // Append the hash code as 16 hex digits. 6.18 + sprintf(&header_version[JVM_IDENT_MAX-17], "%016" PRIx64, hash); 6.19 header_version[JVM_IDENT_MAX-1] = 0; // Null terminate. 6.20 } 6.21 }
7.1 --- a/src/share/vm/oops/oop.cpp Sat Sep 12 00:09:03 2020 +0300 7.2 +++ b/src/share/vm/oops/oop.cpp Wed Sep 23 15:18:53 2020 +0300 7.3 @@ -111,7 +111,7 @@ 7.4 jchar* chars = java_lang_String::as_unicode_string(this, length, THREAD); 7.5 if (chars != NULL) { 7.6 // Use alternate hashing algorithm on the string 7.7 - return AltHashing::murmur3_32(seed, chars, length); 7.8 + return AltHashing::halfsiphash_64(seed, chars, length); 7.9 } else { 7.10 vm_exit_out_of_memory(length, OOM_MALLOC_ERROR, "unable to create Unicode strings for String table rehash"); 7.11 return 0;
8.1 --- a/src/share/vm/oops/symbol.cpp Sat Sep 12 00:09:03 2020 +0300 8.2 +++ b/src/share/vm/oops/symbol.cpp Wed Sep 23 15:18:53 2020 +0300 8.3 @@ -210,7 +210,7 @@ 8.4 unsigned int Symbol::new_hash(juint seed) { 8.5 ResourceMark rm; 8.6 // Use alternate hashing algorithm on this symbol. 8.7 - return AltHashing::murmur3_32(seed, (const jbyte*)as_C_string(), utf8_length()); 8.8 + return AltHashing::halfsiphash_64(seed, (const int8_t*)as_C_string(), utf8_length()); 8.9 } 8.10 8.11 void Symbol::increment_refcount() {