Epsilon GC + a number of other small tweaks (#513)

* Version 3

* Use SWAR algorithm from netty for finding a symbol in a string

* Faster equals - store the remainder in a long field (- 0.5s)

* optimise parsing numbers - prep

* Keep tweaking parsing logic

* Rewrote number parsing

may be a tiby bit faster it at all

* Epsilon GC
This commit is contained in:
Roman Musin 2024-01-20 19:30:25 +00:00 committed by GitHub
parent 062f2bbecf
commit 9100ed6316
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 59 additions and 72 deletions

View File

@ -16,4 +16,10 @@
#
JAVA_OPTS="--enable-preview -XX:+UseTransparentHugePages"
# epsilon GC needs enough memory or it makes things worse
# see https://stackoverflow.com/questions/58087596/why-are-repeated-memory-allocations-observed-to-be-slower-using-epsilon-vs-g1
# 2GB seems to be the sweet spot
JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:-EnableJVMCI -XX:+UseEpsilonGC -Xmx2G -Xms2G -XX:+AlwaysPreTouch"
java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_roman_r_m

View File

@ -33,37 +33,35 @@ public class CalculateAverage_roman_r_m {
private static Unsafe UNSAFE;
// based on http://0x80.pl/notesen/2023-03-06-swar-find-any.html
static long hasZeroByte(long l) {
return ((l - 0x0101010101010101L) & ~(l) & 0x8080808080808080L);
}
static long firstSetByteIndex(long l) {
return ((((l - 1) & 0x101010101010101L) * 0x101010101010101L) >> 56) - 1;
}
static long broadcast(byte b) {
private static long broadcast(byte b) {
return 0x101010101010101L * b;
}
static long SEMICOLON_MASK = broadcast((byte) ';');
static long LINE_END_MASK = broadcast((byte) '\n');
private static final long SEMICOLON_MASK = broadcast((byte) ';');
private static final long LINE_END_MASK = broadcast((byte) '\n');
private static final long DOT_MASK = broadcast((byte) '.');
static long find(long l, long mask) {
long xor = l ^ mask;
long match = hasZeroByte(xor);
return match != 0 ? firstSetByteIndex(match) : -1;
// from netty
/**
* Applies a compiled pattern to given word.
* Returns a word where each byte that matches the pattern has the highest bit set.
*/
private static long applyPattern(final long word, final long pattern) {
long input = word ^ pattern;
long tmp = (input & 0x7F7F7F7F7F7F7F7FL) + 0x7F7F7F7F7F7F7F7FL;
return ~(tmp | input | 0x7F7F7F7F7F7F7F7FL);
}
static long nextNewline(long from, MemorySegment ms) {
long start = from;
long i;
long next = ms.get(ValueLayout.JAVA_LONG_UNALIGNED, start);
while ((i = find(next, LINE_END_MASK)) < 0) {
while ((i = applyPattern(next, LINE_END_MASK)) == 0) {
start += 8;
next = ms.get(ValueLayout.JAVA_LONG_UNALIGNED, start);
}
return start + i;
return start + Long.numberOfTrailingZeros(i) / 8;
}
static class Worker {
@ -84,55 +82,53 @@ public class CalculateAverage_roman_r_m {
private void parseName(ByteString station) {
long start = offset;
long pos = -1;
while (end - offset > 8) {
long pattern;
long next = UNSAFE.getLong(offset);
pos = find(next, SEMICOLON_MASK);
if (pos >= 0) {
offset += pos;
break;
}
else {
while ((pattern = applyPattern(next, SEMICOLON_MASK)) == 0) {
offset += 8;
next = UNSAFE.getLong(offset);
}
}
if (pos < 0) {
while (UNSAFE.getByte(offset++) != ';') {
}
offset--;
}
int bytes = Long.numberOfTrailingZeros(pattern) / 8;
offset += bytes;
int len = (int) (offset - start);
station.offset = start;
station.len = len;
station.hash = 0;
station.tail = next & ((1L << (8 * bytes)) - 1);
offset++;
}
long parseNumberFast() {
int parseNumberFast() {
long encodedVal = UNSAFE.getLong(offset);
var len = find(encodedVal, LINE_END_MASK);
offset += len + 1;
int neg = 1 - Integer.bitCount((int) (encodedVal & 0x10));
encodedVal >>>= 8 * neg;
var len = applyPattern(encodedVal, DOT_MASK);
len = Long.numberOfTrailingZeros(len) / 8;
encodedVal ^= broadcast((byte) 0x30);
long c0 = len == 4 ? 100 : 10;
long c1 = 10 * (len - 3);
long c2 = 4 - len;
long c3 = len - 3;
long a = (encodedVal & 0xFF) * c0;
long b = ((encodedVal & 0xFF00) >>> 8) * c1;
long c = ((encodedVal & 0xFF0000L) >>> 16) * c2;
long d = ((encodedVal & 0xFF000000L) >>> 24) * c3;
int intPart = (int) (encodedVal & ((1 << (8 * len)) - 1));
intPart <<= 8 * (2 - len);
intPart *= (100 * 256 + 10);
intPart = (intPart & 0x3FF80) >>> 8;
return a + b + c + d;
int frac = (int) ((encodedVal >>> (8 * (len + 1))) & 0xFF);
offset += neg + len + 3; // 1 for . + 1 for fractional part + 1 for new line char
int sign = 1 - 2 * neg;
int val = intPart + frac;
return sign * val;
}
long parseNumberSlow() {
long val = UNSAFE.getByte(offset++) - '0';
int parseNumberSlow() {
int neg = 1 - Integer.bitCount(UNSAFE.getByte(offset) & 0x10);
offset += neg;
int val = UNSAFE.getByte(offset++) - '0';
byte b;
while ((b = UNSAFE.getByte(offset++)) != '.') {
val = val * 10 + (b - '0');
@ -140,24 +136,19 @@ public class CalculateAverage_roman_r_m {
b = UNSAFE.getByte(offset);
val = val * 10 + (b - '0');
offset += 2;
return val;
}
long parseNumber() {
long val;
int neg = 1 - Integer.bitCount(UNSAFE.getByte(offset) & 0x10);
offset += neg;
if (end - offset > 8) {
val = parseNumberFast();
}
else {
val = parseNumberSlow();
}
val *= 1 - 2 * neg;
return val;
}
int parseNumber() {
if (end - offset >= 8) {
return parseNumberFast();
}
else {
return parseNumberSlow();
}
}
public TreeMap<String, ResultRow> run() {
var resultStore = new ResultStore();
var station = new ByteString(ms);
@ -218,6 +209,7 @@ public class CalculateAverage_roman_r_m {
private long offset;
private int len = 0;
private int hash = 0;
private long tail = 0L;
ByteString(MemorySegment ms) {
this.ms = ms;
@ -235,6 +227,7 @@ public class CalculateAverage_roman_r_m {
copy.offset = this.offset;
copy.len = this.len;
copy.hash = this.hash;
copy.tail = this.tail;
return copy;
}
@ -259,19 +252,7 @@ public class CalculateAverage_roman_r_m {
return false;
}
}
if (len >= 8) {
long l1 = UNSAFE.getLong(offset + len - 8);
long l2 = UNSAFE.getLong(that.offset + len - 8);
return l1 == l2;
}
for (; i < len; i++) {
byte i1 = UNSAFE.getByte(offset + i);
byte i2 = UNSAFE.getByte(that.offset + i);
if (i1 != i2) {
return false;
}
}
return true;
return this.tail == that.tail;
}
@Override