Next version (#596)

* cleanup prepare script

* native image options

* fix quardaric probing (no change to perf)

* mask to get the last chunk of the name

* extract hash functions

* tweak the probing loop (-100ms)

* fiddle with native image options

* Reorder conditions in hope it makes branch predictor happier

* extracted constant
This commit is contained in:
Roman Musin 2024-01-27 14:17:55 +00:00 committed by GitHub
parent c228633b57
commit f9c58414da
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 60 additions and 43 deletions

View File

@ -15,21 +15,16 @@
# limitations under the License. # limitations under the License.
# #
JAVA_OPTS="--enable-preview -XX:+UseTransparentHugePages"
# epsilon GC needs enough memory or it makes things worse
# see https://stackoverflow.com/questions/58087596/why-are-repeated-memory-allocations-observed-to-be-slower-using-epsilon-vs-g1
JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:-EnableJVMCI -XX:+UseEpsilonGC -Xmx1G -Xms1G -XX:+AlwaysPreTouch"
if [ -f target/CalculateAverage_roman_r_m_image ]; then if [ -f target/CalculateAverage_roman_r_m_image ]; then
echo "Picking up existing native image 'target/CalculateAverage_roman_r_m_image', delete the file to select JVM mode." 1>&2 echo "Running native image 'target/CalculateAverage_roman_r_m_image'." 1>&2
target/CalculateAverage_roman_r_m_image target/CalculateAverage_roman_r_m_image
else else
JAVA_OPTS="--enable-preview -XX:+UnlockExperimentalVMOptions -XX:+TrustFinalNonStaticFields -dsa -XX:+UseNUMA" JAVA_OPTS="--enable-preview -XX:+UseTransparentHugePages"
if [[ ! "$(uname -s)" = "Darwin" ]]; then JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:+TrustFinalNonStaticFields -dsa -XX:+UseNUMA"
# On OS/X, my machine, this errors: # epsilon GC needs enough memory or it makes things worse
JAVA_OPTS="$JAVA_OPTS -XX:+UseTransparentHugePages" # see https://stackoverflow.com/questions/58087596/why-are-repeated-memory-allocations-observed-to-be-slower-using-epsilon-vs-g1
fi JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:-EnableJVMCI -XX:+UseEpsilonGC -Xmx1G -Xms1G -XX:+AlwaysPreTouch"
echo "Choosing to run the app in JVM mode as no native image was found, use additional_build_step_roman_r_m.sh to generate." 1>&2
echo "Running on JVM" 1>&2
java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_roman_r_m java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_roman_r_m
fi fi

View File

@ -23,6 +23,8 @@ if [ ! -f target/CalculateAverage_roman_r_m_image ]; then
JAVA_OPTS="--enable-preview -dsa" JAVA_OPTS="--enable-preview -dsa"
NATIVE_IMAGE_OPTS="--initialize-at-build-time=dev.morling.onebrc.CalculateAverage_roman_r_m --gc=epsilon -Ob -O3 -march=native --strict-image-heap $JAVA_OPTS" NATIVE_IMAGE_OPTS="--initialize-at-build-time=dev.morling.onebrc.CalculateAverage_roman_r_m --gc=epsilon -Ob -O3 -march=native --strict-image-heap $JAVA_OPTS"
NATIVE_IMAGE_OPTS="$NATIVE_IMAGE_OPTS -R:MaxHeapSize=128m"
NATIVE_IMAGE_OPTS="$NATIVE_IMAGE_OPTS -H:+UnlockExperimentalVMOptions -H:-GenLoopSafepoints -H:InlineAllBonus=10 -H:-ParseRuntimeOptions"
native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_roman_r_m_image dev.morling.onebrc.CalculateAverage_roman_r_m native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_roman_r_m_image dev.morling.onebrc.CalculateAverage_roman_r_m
fi fi

View File

@ -40,6 +40,7 @@ public class CalculateAverage_roman_r_m {
private static final long SEMICOLON_MASK = broadcast((byte) ';'); private static final long SEMICOLON_MASK = broadcast((byte) ';');
private static final long LINE_END_MASK = broadcast((byte) '\n'); private static final long LINE_END_MASK = broadcast((byte) '\n');
private static final long DOT_MASK = broadcast((byte) '.'); private static final long DOT_MASK = broadcast((byte) '.');
private static final long ZEROES_MASK = broadcast((byte) '0');
// from netty // from netty
@ -64,6 +65,15 @@ public class CalculateAverage_roman_r_m {
return start + Long.numberOfTrailingZeros(i) / 8; return start + Long.numberOfTrailingZeros(i) / 8;
} }
static int hashFull(long word) {
return (int) (word ^ (word >>> 32));
}
static int hashPartial(long word, int bytes) {
long h = Long.reverseBytes(word) >>> (8 * (8 - bytes));
return (int) (h ^ (h >>> 32));
}
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
Field f = Unsafe.class.getDeclaredField("theUnsafe"); Field f = Unsafe.class.getDeclaredField("theUnsafe");
f.setAccessible(true); f.setAccessible(true);
@ -96,34 +106,37 @@ public class CalculateAverage_roman_r_m {
var station = new ByteString(segment); var station = new ByteString(segment);
long offset = segment.address(); long offset = segment.address();
long end = offset + segment.byteSize(); long end = offset + segment.byteSize();
long tailMask;
while (offset < end) { while (offset < end) {
// parsing station name // parsing station name
long start = offset; long start = offset;
long next = UNSAFE.getLong(offset); long next = UNSAFE.getLong(offset);
long pattern = applyPattern(next, SEMICOLON_MASK); long pattern = applyPattern(next, SEMICOLON_MASK);
int bytes; int bytes;
if (pattern != 0) { if (pattern == 0) {
bytes = Long.numberOfTrailingZeros(pattern) / 8; station.hash = hashFull(next);
offset += bytes; do {
long h = Long.reverseBytes(next) >>> (8 * (8 - bytes));
station.hash = (int) (h ^ (h >>> 32));
}
else {
long h = next;
station.hash = (int) (h ^ (h >>> 32));
while (pattern == 0) {
offset += 8; offset += 8;
next = UNSAFE.getLong(offset); next = UNSAFE.getLong(offset);
pattern = applyPattern(next, SEMICOLON_MASK); pattern = applyPattern(next, SEMICOLON_MASK);
} } while (pattern == 0);
bytes = Long.numberOfTrailingZeros(pattern) / 8; bytes = Long.numberOfTrailingZeros(pattern) / 8;
offset += bytes; offset += bytes;
tailMask = ((1L << (8 * bytes)) - 1);
}
else {
bytes = Long.numberOfTrailingZeros(pattern) / 8;
offset += bytes;
tailMask = ((1L << (8 * bytes)) - 1);
station.hash = hashPartial(next, bytes);
} }
int len = (int) (offset - start); int len = (int) (offset - start);
station.offset = start; station.offset = start;
station.len = len; station.len = len;
station.tail = next & ((1L << (8 * bytes)) - 1); station.tail = next & tailMask;
offset++; offset++;
@ -140,7 +153,7 @@ public class CalculateAverage_roman_r_m {
long numLen = applyPattern(encodedVal, DOT_MASK); long numLen = applyPattern(encodedVal, DOT_MASK);
numLen = Long.numberOfTrailingZeros(numLen) / 8; numLen = Long.numberOfTrailingZeros(numLen) / 8;
encodedVal ^= broadcast((byte) 0x30); encodedVal ^= ZEROES_MASK;
int intPart = (int) (encodedVal & ((1 << (8 * numLen)) - 1)); int intPart = (int) (encodedVal & ((1 << (8 * numLen)) - 1));
intPart <<= 8 * (2 - numLen); intPart <<= 8 * (2 - numLen);
@ -285,24 +298,31 @@ public class CalculateAverage_roman_r_m {
int h = s.hashCode(); int h = s.hashCode();
int idx = (SIZE - 1) & h; int idx = (SIZE - 1) & h;
var keys = this.keys;
int idx0 = idx;
int i = 0; int i = 0;
while (keys[idx] != null && !keys[idx].equals(s)) { while (true) {
i++; if (keys[idx] != null && keys[idx].equals(s)) {
idx = (idx + i * i) % SIZE; values[idx][0] = Math.min(values[idx][0], value);
} values[idx][1] = Math.max(values[idx][1], value);
if (keys[idx] == null) { values[idx][2] += value;
keys[idx] = s.copy(); values[idx][3] += 1;
values[idx] = new int[4]; return;
values[idx][0] = value; }
values[idx][1] = value; else if (keys[idx] == null) {
values[idx][2] = value; keys[idx] = s.copy();
values[idx][3] = 1; values[idx] = new int[4];
} values[idx][0] = value;
else { values[idx][1] = value;
values[idx][0] = Math.min(values[idx][0], value); values[idx][2] = value;
values[idx][1] = Math.max(values[idx][1], value); values[idx][3] = 1;
values[idx][2] += value; return;
values[idx][3] += 1; }
else {
i++;
idx = (idx0 + i * i) % SIZE;
}
} }
} }