From 249bdc4b811728e0db3f9e715f8c0e816faf55b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pap=20L=C5=91rinc?= Date: Wed, 30 Nov 2016 11:37:08 +0200 Subject: [PATCH 1/2] Optimized HashTable.index [2.12.x backport] --- .../scala/collection/mutable/HashTable.scala | 83 ++++++------------- 1 file changed, 24 insertions(+), 59 deletions(-) diff --git a/src/library/scala/collection/mutable/HashTable.scala b/src/library/scala/collection/mutable/HashTable.scala index b48a32fa3749..7b3567b9f56c 100644 --- a/src/library/scala/collection/mutable/HashTable.scala +++ b/src/library/scala/collection/mutable/HashTable.scala @@ -357,14 +357,14 @@ trait HashTable[A, Entry >: Null <: HashEntry[A, Entry]] extends HashTable.HashU protected def elemEquals(key1: A, key2: A): Boolean = (key1 == key2) - // Note: - // we take the most significant bits of the hashcode, not the lower ones - // this is of crucial importance when populating the table in parallel - protected final def index(hcode: Int) = { + /** + * Note: we take the most significant bits of the hashcode, not the lower ones + * this is of crucial importance when populating the table in parallel + */ + protected final def index(hcode: Int): Int = { val ones = table.length - 1 - val improved = improve(hcode, seedvalue) - val shifted = (improved >> (32 - java.lang.Integer.bitCount(ones))) & ones - shifted + val exponent = Integer.numberOfLeadingZeros(ones) + (improve(hcode, seedvalue) >>> exponent) & ones } protected def initWithContents(c: HashTable.Contents[A, Entry]) = { @@ -408,58 +408,23 @@ private[collection] object HashTable { protected def elemHashCode(key: KeyType) = key.## - protected final def improve(hcode: Int, seed: Int) = { - /* Murmur hash - * m = 0x5bd1e995 - * r = 24 - * note: h = seed = 0 in mmix - * mmix(h,k) = k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; */ - // var k = hcode * 0x5bd1e995 - // k ^= k >> 24 - // k *= 0x5bd1e995 - // k - - /* Another fast multiplicative hash - * by Phil Bagwell - * - * Comment: - * Multiplication doesn't affect all the bits in the same way, so we want to - * multiply twice, "once from each side". - * It would be ideal to reverse all the bits after the first multiplication, - * however, this is more costly. We therefore restrict ourselves only to - * reversing the bytes before final multiplication. This yields a slightly - * worse entropy in the lower 8 bits, but that can be improved by adding: - * - * `i ^= i >> 6` - * - * For performance reasons, we avoid this improvement. - * */ - val i= scala.util.hashing.byteswap32(hcode) - - /* Jenkins hash - * for range 0-10000, output has the msb set to zero */ - // var h = hcode + (hcode << 12) - // h ^= (h >> 22) - // h += (h << 4) - // h ^= (h >> 9) - // h += (h << 10) - // h ^= (h >> 2) - // h += (h << 7) - // h ^= (h >> 12) - // h - - /* OLD VERSION - * quick, but bad for sequence 0-10000 - little entropy in higher bits - * since 2003 */ - // var h: Int = hcode + ~(hcode << 9) - // h = h ^ (h >>> 14) - // h = h + (h << 4) - // h ^ (h >>> 10) - - // the rest of the computation is due to SI-5293 - val rotation = seed % 32 - val rotated = (i >>> rotation) | (i << (32 - rotation)) - rotated + /** + * Defer to a high-quality hash in [[scala.util.hashing]]. + * The goal is to distribute across bins as well as possible even if a hash code has low entropy at some bits. + *

+ * OLD VERSION - quick, but bad for sequence 0-10000 - little entropy in higher bits - since 2003 + * {{{ + * var h: Int = hcode + ~(hcode << 9) + * h = h ^ (h >>> 14) + * h = h + (h << 4) + * h ^ (h >>> 10) + * }}} + * the rest of the computation is due to SI-5293 + */ + protected final def improve(hcode: Int, seed: Int): Int = { + val hash = scala.util.hashing.byteswap32(hcode) + val shift = seed & ((1 << 5) - 1) + (hash >>> shift) | (hash << (32 - shift)) } } From e3f41e5b4c762a0a0065fde4a9455430a88b1566 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pap=20L=C5=91rinc?= Date: Wed, 30 Nov 2016 11:41:53 +0200 Subject: [PATCH 2/2] Changed HashMap.getOrElseUpdate to only calculate the index once [2.12.x backport] --- .../scala/collection/mutable/HashMap.scala | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/library/scala/collection/mutable/HashMap.scala b/src/library/scala/collection/mutable/HashMap.scala index 6fca75ffead2..5f91a338b43f 100644 --- a/src/library/scala/collection/mutable/HashMap.scala +++ b/src/library/scala/collection/mutable/HashMap.scala @@ -72,6 +72,37 @@ extends AbstractMap[A, B] else Some(e.value) } + override def getOrElseUpdate(key: A, defaultValue: => B): B = { + val i = index(elemHashCode(key)) + val entry = findEntry(key, i) + if (entry != null) entry.value + else addEntry(createNewEntry(key, defaultValue), i) + } + + /* inlined HashTable.findEntry0 to preserve its visibility */ + private[this] def findEntry(key: A, h: Int): Entry = { + var e = table(h).asInstanceOf[Entry] + while (notFound(key, e)) + e = e.next + e + } + private[this] def notFound(key: A, e: Entry): Boolean = (e != null) && !elemEquals(e.key, key) + + /* inlined HashTable.addEntry0 to preserve its visibility */ + private[this] def addEntry(e: Entry, h: Int): B = { + if (tableSize >= threshold) addEntry(e) + else addEntry0(e, h) + e.value + } + + /* extracted to make addEntry inlinable */ + private[this] def addEntry0(e: Entry, h: Int) { + e.next = table(h).asInstanceOf[Entry] + table(h) = e + tableSize += 1 + nnSizeMapAdd(h) + } + override def put(key: A, value: B): Option[B] = { val e = findOrAddEntry(key, value) if (e eq null) None