src/share/vm/classfile/altHashing.cpp

Wed, 23 Sep 2020 15:18:53 +0300

author
vkempik
date
Wed, 23 Sep 2020 15:18:53 +0300
changeset 10008
fd3484fadbe3
parent 9351
bae7d3cdf6af
child 10009
8adf45218add
permissions
-rw-r--r--

8240124: Better VM Interning
Reviewed-by: mbalao, andrew

coleenp@3865 1 /*
vkempik@10008 2 * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved.
coleenp@3865 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
coleenp@3865 4 *
coleenp@3865 5 * This code is free software; you can redistribute it and/or modify it
coleenp@3865 6 * under the terms of the GNU General Public License version 2 only, as
coleenp@3865 7 * published by the Free Software Foundation.
coleenp@3865 8 *
coleenp@3865 9 * This code is distributed in the hope that it will be useful, but WITHOUT
coleenp@3865 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
coleenp@3865 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
coleenp@3865 12 * version 2 for more details (a copy is included in the LICENSE file that
coleenp@3865 13 * accompanied this code).
coleenp@3865 14 *
coleenp@3865 15 * You should have received a copy of the GNU General Public License version
coleenp@3865 16 * 2 along with this work; if not, write to the Free Software Foundation,
coleenp@3865 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
coleenp@3865 18 *
coleenp@3865 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
coleenp@3865 20 * or visit www.oracle.com if you need additional information or have any
coleenp@3865 21 * questions.
coleenp@3865 22 *
coleenp@3865 23 */
coleenp@3865 24
vkempik@10008 25 /*
vkempik@10008 26 * halfsiphash code adapted from reference implementation
vkempik@10008 27 * (https://github.com/veorq/SipHash/blob/master/halfsiphash.c)
vkempik@10008 28 * which is distributed with the following copyright:
vkempik@10008 29 *
vkempik@10008 30 * SipHash reference C implementation
vkempik@10008 31 *
vkempik@10008 32 * Copyright (c) 2016 Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
vkempik@10008 33 *
vkempik@10008 34 * To the extent possible under law, the author(s) have dedicated all copyright
vkempik@10008 35 * and related and neighboring rights to this software to the public domain
vkempik@10008 36 * worldwide. This software is distributed without any warranty.
vkempik@10008 37 *
vkempik@10008 38 * You should have received a copy of the CC0 Public Domain Dedication along
vkempik@10008 39 * with this software. If not, see
vkempik@10008 40 * <http://creativecommons.org/publicdomain/zero/1.0/>.
vkempik@10008 41 */
vkempik@10008 42
coleenp@3865 43 #include "precompiled.hpp"
coleenp@3865 44 #include "classfile/altHashing.hpp"
coleenp@3865 45 #include "classfile/systemDictionary.hpp"
coleenp@3865 46 #include "oops/markOop.hpp"
vkempik@10008 47 #include "runtime/os.hpp"
coleenp@3865 48
coleenp@3865 49 // Get the hash code of the classes mirror if it exists, otherwise just
coleenp@3865 50 // return a random number, which is one of the possible hash code used for
coleenp@3865 51 // objects. We don't want to call the synchronizer hash code to install
coleenp@3865 52 // this value because it may safepoint.
coleenp@4037 53 intptr_t object_hash(Klass* k) {
coleenp@3865 54 intptr_t hc = k->java_mirror()->mark()->hash();
coleenp@3865 55 return hc != markOopDesc::no_hash ? hc : os::random();
coleenp@3865 56 }
coleenp@3865 57
coleenp@3865 58 // Seed value used for each alternative hash calculated.
vkempik@10008 59 uint64_t AltHashing::compute_seed() {
vkempik@10008 60 uint64_t nanos = os::javaTimeNanos();
vkempik@10008 61 uint64_t now = os::javaTimeMillis();
vkempik@10008 62 uint32_t SEED_MATERIAL[8] = {
vkempik@10008 63 (uint32_t) object_hash(SystemDictionary::String_klass()),
vkempik@10008 64 (uint32_t) object_hash(SystemDictionary::System_klass()),
vkempik@10008 65 (uint32_t) os::random(), // current thread isn't a java thread
vkempik@10008 66 (uint32_t) (((uint64_t)nanos) >> 32),
vkempik@10008 67 (uint32_t) nanos,
vkempik@10008 68 (uint32_t) (((uint64_t)now) >> 32),
vkempik@10008 69 (uint32_t) now,
vkempik@10008 70 (uint32_t) (os::javaTimeNanos() >> 2)
coleenp@3865 71 };
coleenp@3865 72
vkempik@10008 73 return halfsiphash_64(SEED_MATERIAL, 8);
coleenp@3865 74 }
coleenp@3865 75
vkempik@10008 76 // utility function copied from java/lang/Integer
vkempik@10008 77 static uint32_t Integer_rotateLeft(uint32_t i, int distance) {
vkempik@10008 78 return (i << distance) | (i >> (32 - distance));
vkempik@10008 79 }
coleenp@3865 80
vkempik@10008 81 static void halfsiphash_rounds(uint32_t v[4], int rounds) {
vkempik@10008 82 while (rounds-- > 0) {
vkempik@10008 83 v[0] += v[1];
vkempik@10008 84 v[1] = Integer_rotateLeft(v[1], 5);
vkempik@10008 85 v[1] ^= v[0];
vkempik@10008 86 v[0] = Integer_rotateLeft(v[0], 16);
vkempik@10008 87 v[2] += v[3];
vkempik@10008 88 v[3] = Integer_rotateLeft(v[3], 8);
vkempik@10008 89 v[3] ^= v[2];
vkempik@10008 90 v[0] += v[3];
vkempik@10008 91 v[3] = Integer_rotateLeft(v[3], 7);
vkempik@10008 92 v[3] ^= v[0];
vkempik@10008 93 v[2] += v[1];
vkempik@10008 94 v[1] = Integer_rotateLeft(v[1], 13);
vkempik@10008 95 v[1] ^= v[2];
vkempik@10008 96 v[2] = Integer_rotateLeft(v[2], 16);
vkempik@10008 97 }
vkempik@10008 98 }
vkempik@10008 99
vkempik@10008 100 static void halfsiphash_adddata(uint32_t v[4], uint32_t newdata, int rounds) {
vkempik@10008 101 v[3] ^= newdata;
vkempik@10008 102 halfsiphash_rounds(v, rounds);
vkempik@10008 103 v[0] ^= newdata;
vkempik@10008 104 }
vkempik@10008 105
vkempik@10008 106 static void halfsiphash_init32(uint32_t v[4], uint64_t seed) {
vkempik@10008 107 v[0] = seed & 0xffffffff;
vkempik@10008 108 v[1] = seed >> 32;
vkempik@10008 109 v[2] = 0x6c796765 ^ v[0];
vkempik@10008 110 v[3] = 0x74656462 ^ v[1];
vkempik@10008 111 }
vkempik@10008 112
vkempik@10008 113 static void halfsiphash_init64(uint32_t v[4], uint64_t seed) {
vkempik@10008 114 halfsiphash_init32(v, seed);
vkempik@10008 115 v[1] ^= 0xee;
vkempik@10008 116 }
vkempik@10008 117
vkempik@10008 118 static uint64_t halfsiphash_finish64(uint32_t v[4], int rounds) {
vkempik@10008 119 uint64_t rv;
vkempik@10008 120 v[2] ^= 0xee;
vkempik@10008 121 halfsiphash_rounds(v, rounds);
vkempik@10008 122 rv = v[1] ^ v[3];
vkempik@10008 123 v[1] ^= 0xdd;
vkempik@10008 124 halfsiphash_rounds(v, rounds);
vkempik@10008 125 rv |= (uint64_t)(v[1] ^ v[3]) << 32;
vkempik@10008 126 return rv;
vkempik@10008 127 }
vkempik@10008 128
vkempik@10008 129 // HalfSipHash-2-4 (64-bit output) for Symbols
vkempik@10008 130 uint64_t AltHashing::halfsiphash_64(uint64_t seed, const int8_t* data, int len) {
vkempik@10008 131 uint32_t v[4];
vkempik@10008 132 uint32_t newdata;
vkempik@10008 133 int off = 0;
coleenp@3865 134 int count = len;
coleenp@3865 135
vkempik@10008 136 halfsiphash_init64(v, seed);
coleenp@3865 137 // body
coleenp@3865 138 while (count >= 4) {
vkempik@10008 139 // Avoid sign extension with 0x0ff
vkempik@10008 140 newdata = (data[off] & 0x0FF)
vkempik@10008 141 | (data[off + 1] & 0x0FF) << 8
vkempik@10008 142 | (data[off + 2] & 0x0FF) << 16
vkempik@10008 143 | data[off + 3] << 24;
coleenp@3865 144
coleenp@3865 145 count -= 4;
vkempik@10008 146 off += 4;
coleenp@3865 147
vkempik@10008 148 halfsiphash_adddata(v, newdata, 2);
coleenp@3865 149 }
coleenp@3865 150
coleenp@3865 151 // tail
vkempik@10008 152 newdata = ((uint32_t)len) << 24; // (Byte.SIZE / Byte.SIZE);
coleenp@3865 153
coleenp@3865 154 if (count > 0) {
coleenp@3865 155 switch (count) {
coleenp@3865 156 case 3:
vkempik@10008 157 newdata |= (data[off + 2] & 0x0ff) << 16;
coleenp@3865 158 // fall through
coleenp@3865 159 case 2:
vkempik@10008 160 newdata |= (data[off + 1] & 0x0ff) << 8;
coleenp@3865 161 // fall through
coleenp@3865 162 case 1:
vkempik@10008 163 newdata |= (data[off] & 0x0ff);
coleenp@3865 164 // fall through
coleenp@3865 165 }
coleenp@3865 166 }
coleenp@3865 167
vkempik@10008 168 halfsiphash_adddata(v, newdata, 2);
vkempik@10008 169
coleenp@3865 170 // finalization
vkempik@10008 171 return halfsiphash_finish64(v, 4);
coleenp@3865 172 }
coleenp@3865 173
vkempik@10008 174 // HalfSipHash-2-4 (64-bit output) for Strings
vkempik@10008 175 uint64_t AltHashing::halfsiphash_64(uint64_t seed, const uint16_t* data, int len) {
vkempik@10008 176 uint32_t v[4];
vkempik@10008 177 uint32_t newdata;
coleenp@3865 178 int off = 0;
coleenp@3865 179 int count = len;
coleenp@3865 180
vkempik@10008 181 halfsiphash_init64(v, seed);
vkempik@10008 182
coleenp@3865 183 // body
coleenp@3865 184 while (count >= 2) {
vkempik@10008 185 uint16_t d1 = data[off++] & 0x0FFFF;
vkempik@10008 186 uint16_t d2 = data[off++];
vkempik@10008 187 newdata = (d1 | d2 << 16);
coleenp@3865 188
coleenp@3865 189 count -= 2;
coleenp@3865 190
vkempik@10008 191 halfsiphash_adddata(v, newdata, 2);
coleenp@3865 192 }
coleenp@3865 193
coleenp@3865 194 // tail
vkempik@10008 195 newdata = ((uint32_t)len * 2) << 24; // (Character.SIZE / Byte.SIZE);
coleenp@3865 196 if (count > 0) {
vkempik@10008 197 newdata |= (uint32_t)data[off];
coleenp@3865 198 }
vkempik@10008 199 halfsiphash_adddata(v, newdata, 2);
coleenp@3865 200
coleenp@3865 201 // finalization
vkempik@10008 202 return halfsiphash_finish64(v, 4);
coleenp@3865 203 }
coleenp@3865 204
vkempik@10008 205 // HalfSipHash-2-4 (64-bit output) for integers (used to create seed)
vkempik@10008 206 uint64_t AltHashing::halfsiphash_64(uint64_t seed, const uint32_t* data, int len) {
vkempik@10008 207 uint32_t v[4];
coleenp@3865 208
coleenp@3865 209 int off = 0;
coleenp@3865 210 int end = len;
coleenp@3865 211
vkempik@10008 212 halfsiphash_init64(v, seed);
vkempik@10008 213
coleenp@3865 214 // body
coleenp@3865 215 while (off < end) {
vkempik@10008 216 halfsiphash_adddata(v, (uint32_t)data[off++], 2);
coleenp@3865 217 }
coleenp@3865 218
coleenp@3865 219 // tail (always empty, as body is always 32-bit chunks)
coleenp@3865 220
coleenp@3865 221 // finalization
vkempik@10008 222 halfsiphash_adddata(v, ((uint32_t)len * 4) << 24, 2); // (Integer.SIZE / Byte.SIZE);
vkempik@10008 223 return halfsiphash_finish64(v, 4);
coleenp@3865 224 }
coleenp@3865 225
vkempik@10008 226 // HalfSipHash-2-4 (64-bit output) for integers (used to create seed)
vkempik@10008 227 uint64_t AltHashing::halfsiphash_64(const uint32_t* data, int len) {
vkempik@10008 228 return halfsiphash_64((uint64_t)0, data, len);
coleenp@3865 229 }
coleenp@3865 230
coleenp@3865 231 #ifndef PRODUCT
vkempik@10008 232 void AltHashing::testHalfsiphash_64_ByteArray() {
vkempik@10008 233 // printf("testHalfsiphash_64_CharArray\n");
vkempik@10008 234 const int factor = 4;
coleenp@3865 235
vkempik@10008 236 int8_t vector[256];
vkempik@10008 237 int8_t hashes[factor * 256];
coleenp@3865 238
vkempik@10008 239 for (int i = 0; i < 256; i++) {
vkempik@10008 240 vector[i] = (int8_t) i;
vkempik@10008 241 }
coleenp@3865 242
vkempik@10008 243 // Hash subranges {}, {0}, {0,1}, {0,1,2}, ..., {0,...,255}
vkempik@10008 244 for (int i = 0; i < 256; i++) {
vkempik@10008 245 uint64_t hash = AltHashing::halfsiphash_64(256 - i, vector, i);
vkempik@10008 246 hashes[i * factor] = (int8_t) hash;
vkempik@10008 247 hashes[i * factor + 1] = (int8_t)(hash >> 8);
vkempik@10008 248 hashes[i * factor + 2] = (int8_t)(hash >> 16);
vkempik@10008 249 hashes[i * factor + 3] = (int8_t)(hash >> 24);
vkempik@10008 250 }
coleenp@3865 251
vkempik@10008 252 // hash to get const result.
vkempik@10008 253 uint64_t final_hash = AltHashing::halfsiphash_64(0, hashes, factor*256);
coleenp@3865 254
vkempik@10008 255 // Value found using reference implementation for the hashes array.
vkempik@10008 256 // halfsiphash((const uint8_t*)hashes, factor*256, (const uint8_t *)&k,
vkempik@10008 257 // (uint8_t*)&reference, 8);
coleenp@3865 258
vkempik@10008 259 static const uint64_t HALFSIPHASH_64_BYTE_CHECK_VALUE = 0x15a7911e30917ee8;
coleenp@3865 260
vkempik@10008 261 assert (HALFSIPHASH_64_BYTE_CHECK_VALUE == final_hash,
vkempik@10008 262 err_msg(
vkempik@10008 263 "Calculated hash result not as expected. Expected " UINT64_FORMAT " got " UINT64_FORMAT,
vkempik@10008 264 HALFSIPHASH_64_BYTE_CHECK_VALUE,
vkempik@10008 265 final_hash));
coleenp@3865 266 }
coleenp@3865 267
vkempik@10008 268 void AltHashing::testHalfsiphash_64_CharArray() {
vkempik@10008 269 // printf("testHalfsiphash_64_CharArray\n");
vkempik@10008 270 const int factor = 2;
vkempik@10008 271
vkempik@10008 272 uint16_t vector[256];
vkempik@10008 273 uint16_t hashes[factor * 256];
vkempik@10008 274
vkempik@10008 275 for (int i = 0; i < 256; i++) {
vkempik@10008 276 vector[i] = (uint16_t) i;
vkempik@10008 277 }
vkempik@10008 278
vkempik@10008 279 // Hash subranges {}, {0}, {0,1}, {0,1,2}, ..., {0,...,255}
vkempik@10008 280 for (int i = 0; i < 256; i++) {
vkempik@10008 281 uint64_t hash = AltHashing::halfsiphash_64(256 - i, vector, i);
vkempik@10008 282 hashes[i * factor] = (uint16_t) hash;
vkempik@10008 283 hashes[i * factor + 1] = (uint16_t)(hash >> 16);
vkempik@10008 284 }
vkempik@10008 285
vkempik@10008 286 // hash to get const result.
vkempik@10008 287 uint64_t final_hash = AltHashing::halfsiphash_64(0, hashes, factor*256);
vkempik@10008 288
vkempik@10008 289 // Value found using reference implementation for the hashes array.
vkempik@10008 290 // halfsiphash((const uint8_t*)hashes, 2*factor*256, (const uint8_t *)&k,
vkempik@10008 291 // (uint8_t*)&reference, 8);
vkempik@10008 292 static const uint64_t HALFSIPHASH_64_CHAR_CHECK_VALUE = 0xf392d8a6a9e24103;
vkempik@10008 293
vkempik@10008 294 assert(HALFSIPHASH_64_CHAR_CHECK_VALUE == final_hash,
vkempik@10008 295 err_msg(
vkempik@10008 296 "Calculated hash result not as expected. Expected " UINT64_FORMAT " got " UINT64_FORMAT,
vkempik@10008 297 HALFSIPHASH_64_CHAR_CHECK_VALUE,
vkempik@10008 298 final_hash));
coleenp@3865 299 }
coleenp@3865 300
vkempik@10008 301 // Test against sample hashes published with the reference implementation:
vkempik@10008 302 // https://github.com/veorq/SipHash
vkempik@10008 303 void AltHashing::testHalfsiphash_64_FromReference() {
vkempik@10008 304 // printf("testHalfsiphash_64_FromReference\n");
coleenp@3865 305
vkempik@10008 306 const uint64_t seed = 0x0706050403020100;
vkempik@10008 307 const uint64_t results[16] = {
vkempik@10008 308 0xc83cb8b9591f8d21, 0xa12ee55b178ae7d5,
vkempik@10008 309 0x8c85e4bc20e8feed, 0x99c7f5ae9f1fc77b,
vkempik@10008 310 0xb5f37b5fd2aa3673, 0xdba7ee6f0a2bf51b,
vkempik@10008 311 0xf1a63fae45107470, 0xb516001efb5f922d,
vkempik@10008 312 0x6c6211d8469d7028, 0xdc7642ec407ad686,
vkempik@10008 313 0x4caec8671cc8385b, 0x5ab1dc27adf3301e,
vkempik@10008 314 0x3e3ea94bc0a8eaa9, 0xe150f598795a4402,
vkempik@10008 315 0x1d5ff142f992a4a1, 0x60e426bf902876d6
vkempik@10008 316 };
vkempik@10008 317 uint32_t vector[16];
coleenp@3865 318
vkempik@10008 319 for (int i = 0; i < 16; i++)
vkempik@10008 320 vector[i] = 0x03020100 + i * 0x04040404;
coleenp@3865 321
vkempik@10008 322 for (int i = 0; i < 16; i++) {
vkempik@10008 323 uint64_t hash = AltHashing::halfsiphash_64(seed, vector, i);
vkempik@10008 324 assert(results[i] == hash,
vkempik@10008 325 err_msg(
vkempik@10008 326 "Calculated hash result not as expected. Round %d: "
vkempik@10008 327 "Expected " UINT64_FORMAT_X " got " UINT64_FORMAT_X "\n",
vkempik@10008 328 i,
vkempik@10008 329 results[i],
vkempik@10008 330 hash));
vkempik@10008 331 }
vkempik@10008 332 }
coleenp@3865 333
coleenp@3865 334 void AltHashing::test_alt_hash() {
vkempik@10008 335 testHalfsiphash_64_ByteArray();
vkempik@10008 336 testHalfsiphash_64_CharArray();
vkempik@10008 337 testHalfsiphash_64_FromReference();
coleenp@3865 338 }
coleenp@3865 339 #endif // PRODUCT

mercurial