From 2a44f8d390af7a4d4e848e0ae792b0e1e6ae7925 Mon Sep 17 00:00:00 2001 From: Jaime Polidura <73758994+JaimePolidura@users.noreply.github.com> Date: Mon, 29 Jan 2024 22:22:22 +0100 Subject: [PATCH] Added improvments on string copying, string comparation & calculation of next index in case of collision in custom map (#650) * added code * Fixed pointers bugs * removed my own benchmark * added comment on how I handle hash collisions * executed mwvn clean verify * made scripts executable & fixed rounding issues * Fixed way of dealing with hash collisions * changed method name sameNameBytes to isSameNameBytes * changes script from sh to bash * fixed chunking bug * Fixed bug in chunking when file size is too small * added Runtime.getRuntime().availableProcessors * added improvemnts on string copying, calculation of next index of Map in case on collision & improved string comparing --- .../CalculateAverage_JaimePolidura.java | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_JaimePolidura.java b/src/main/java/dev/morling/onebrc/CalculateAverage_JaimePolidura.java index 3980a2c..bc9070c 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_JaimePolidura.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_JaimePolidura.java @@ -309,16 +309,14 @@ public final class CalculateAverage_JaimePolidura { } public void put(long hashToPut, byte[] nameToPut, int nameLength, int valueToPut) { - int index = hashToIndex(hashToPut); + int index = toIndex(hashToPut); for (;;) { Result actualEntry = entries[index]; if (actualEntry == null) { byte[] nameToPutCopy = new byte[nameLength]; - for (int i = 0; i < nameLength; i++) { - nameToPutCopy[i] = nameToPut[i]; - } + UNSAFE.copyMemory(nameToPut, Unsafe.ARRAY_BYTE_BASE_OFFSET, nameToPutCopy, Unsafe.ARRAY_BYTE_BASE_OFFSET, nameLength); entries[index] = new Result(hashToPut, nameToPutCopy, nameLength, valueToPut, valueToPut, valueToPut, 1); @@ -331,14 +329,12 @@ public final class CalculateAverage_JaimePolidura { actualEntry.sum = actualEntry.sum + valueToPut; return; } - // If the name is not the same, we try to go to the next slot - if (++index >= this.size) { - index = 0; - } + + index = toIndex(index + 31); } } - private int hashToIndex(long hash) { + private int toIndex(long hash) { return (int) (((hash >> 32) ^ ((int) hash)) & (this.size - 1)); } } @@ -367,8 +363,15 @@ public final class CalculateAverage_JaimePolidura { } private boolean isSameNameBytes(byte[] otherNameBytes) { - for (int i = 0; i < this.nameLength; i++) { - if (this.name[i] != otherNameBytes[i]) { + for (int i = 0; i < this.nameLength; i += 8) { + long thisNameBytesAsLong = UNSAFE.getLong(this.name, Unsafe.ARRAY_BYTE_BASE_OFFSET + i); + long otherNameBytesAsLong = UNSAFE.getLong(otherNameBytes, Unsafe.ARRAY_BYTE_BASE_OFFSET + i); + + int isPositiveAsInt = (((8 - nameLength + i) >> 31) & 1) ^ 0x01; + int shift = ((8 - nameLength + i) * isPositiveAsInt) * 8; + otherNameBytesAsLong = (otherNameBytesAsLong << shift) >>> shift; + + if (thisNameBytesAsLong != otherNameBytesAsLong) { return false; } }