Next version (#596)
* cleanup prepare script * native image options * fix quardaric probing (no change to perf) * mask to get the last chunk of the name * extract hash functions * tweak the probing loop (-100ms) * fiddle with native image options * Reorder conditions in hope it makes branch predictor happier * extracted constant
This commit is contained in:
@@ -15,21 +15,16 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
|
||||||
JAVA_OPTS="--enable-preview -XX:+UseTransparentHugePages"
|
|
||||||
|
|
||||||
# epsilon GC needs enough memory or it makes things worse
|
|
||||||
# see https://stackoverflow.com/questions/58087596/why-are-repeated-memory-allocations-observed-to-be-slower-using-epsilon-vs-g1
|
|
||||||
JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:-EnableJVMCI -XX:+UseEpsilonGC -Xmx1G -Xms1G -XX:+AlwaysPreTouch"
|
|
||||||
|
|
||||||
if [ -f target/CalculateAverage_roman_r_m_image ]; then
|
if [ -f target/CalculateAverage_roman_r_m_image ]; then
|
||||||
echo "Picking up existing native image 'target/CalculateAverage_roman_r_m_image', delete the file to select JVM mode." 1>&2
|
echo "Running native image 'target/CalculateAverage_roman_r_m_image'." 1>&2
|
||||||
target/CalculateAverage_roman_r_m_image
|
target/CalculateAverage_roman_r_m_image
|
||||||
else
|
else
|
||||||
JAVA_OPTS="--enable-preview -XX:+UnlockExperimentalVMOptions -XX:+TrustFinalNonStaticFields -dsa -XX:+UseNUMA"
|
JAVA_OPTS="--enable-preview -XX:+UseTransparentHugePages"
|
||||||
if [[ ! "$(uname -s)" = "Darwin" ]]; then
|
JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:+TrustFinalNonStaticFields -dsa -XX:+UseNUMA"
|
||||||
# On OS/X, my machine, this errors:
|
# epsilon GC needs enough memory or it makes things worse
|
||||||
JAVA_OPTS="$JAVA_OPTS -XX:+UseTransparentHugePages"
|
# see https://stackoverflow.com/questions/58087596/why-are-repeated-memory-allocations-observed-to-be-slower-using-epsilon-vs-g1
|
||||||
fi
|
JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:-EnableJVMCI -XX:+UseEpsilonGC -Xmx1G -Xms1G -XX:+AlwaysPreTouch"
|
||||||
echo "Choosing to run the app in JVM mode as no native image was found, use additional_build_step_roman_r_m.sh to generate." 1>&2
|
|
||||||
|
echo "Running on JVM" 1>&2
|
||||||
java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_roman_r_m
|
java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_roman_r_m
|
||||||
fi
|
fi
|
||||||
|
@@ -23,6 +23,8 @@ if [ ! -f target/CalculateAverage_roman_r_m_image ]; then
|
|||||||
|
|
||||||
JAVA_OPTS="--enable-preview -dsa"
|
JAVA_OPTS="--enable-preview -dsa"
|
||||||
NATIVE_IMAGE_OPTS="--initialize-at-build-time=dev.morling.onebrc.CalculateAverage_roman_r_m --gc=epsilon -Ob -O3 -march=native --strict-image-heap $JAVA_OPTS"
|
NATIVE_IMAGE_OPTS="--initialize-at-build-time=dev.morling.onebrc.CalculateAverage_roman_r_m --gc=epsilon -Ob -O3 -march=native --strict-image-heap $JAVA_OPTS"
|
||||||
|
NATIVE_IMAGE_OPTS="$NATIVE_IMAGE_OPTS -R:MaxHeapSize=128m"
|
||||||
|
NATIVE_IMAGE_OPTS="$NATIVE_IMAGE_OPTS -H:+UnlockExperimentalVMOptions -H:-GenLoopSafepoints -H:InlineAllBonus=10 -H:-ParseRuntimeOptions"
|
||||||
|
|
||||||
native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_roman_r_m_image dev.morling.onebrc.CalculateAverage_roman_r_m
|
native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_roman_r_m_image dev.morling.onebrc.CalculateAverage_roman_r_m
|
||||||
fi
|
fi
|
@@ -40,6 +40,7 @@ public class CalculateAverage_roman_r_m {
|
|||||||
private static final long SEMICOLON_MASK = broadcast((byte) ';');
|
private static final long SEMICOLON_MASK = broadcast((byte) ';');
|
||||||
private static final long LINE_END_MASK = broadcast((byte) '\n');
|
private static final long LINE_END_MASK = broadcast((byte) '\n');
|
||||||
private static final long DOT_MASK = broadcast((byte) '.');
|
private static final long DOT_MASK = broadcast((byte) '.');
|
||||||
|
private static final long ZEROES_MASK = broadcast((byte) '0');
|
||||||
|
|
||||||
// from netty
|
// from netty
|
||||||
|
|
||||||
@@ -64,6 +65,15 @@ public class CalculateAverage_roman_r_m {
|
|||||||
return start + Long.numberOfTrailingZeros(i) / 8;
|
return start + Long.numberOfTrailingZeros(i) / 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int hashFull(long word) {
|
||||||
|
return (int) (word ^ (word >>> 32));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int hashPartial(long word, int bytes) {
|
||||||
|
long h = Long.reverseBytes(word) >>> (8 * (8 - bytes));
|
||||||
|
return (int) (h ^ (h >>> 32));
|
||||||
|
}
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
Field f = Unsafe.class.getDeclaredField("theUnsafe");
|
Field f = Unsafe.class.getDeclaredField("theUnsafe");
|
||||||
f.setAccessible(true);
|
f.setAccessible(true);
|
||||||
@@ -96,34 +106,37 @@ public class CalculateAverage_roman_r_m {
|
|||||||
var station = new ByteString(segment);
|
var station = new ByteString(segment);
|
||||||
long offset = segment.address();
|
long offset = segment.address();
|
||||||
long end = offset + segment.byteSize();
|
long end = offset + segment.byteSize();
|
||||||
|
long tailMask;
|
||||||
while (offset < end) {
|
while (offset < end) {
|
||||||
// parsing station name
|
// parsing station name
|
||||||
long start = offset;
|
long start = offset;
|
||||||
long next = UNSAFE.getLong(offset);
|
long next = UNSAFE.getLong(offset);
|
||||||
long pattern = applyPattern(next, SEMICOLON_MASK);
|
long pattern = applyPattern(next, SEMICOLON_MASK);
|
||||||
int bytes;
|
int bytes;
|
||||||
if (pattern != 0) {
|
if (pattern == 0) {
|
||||||
bytes = Long.numberOfTrailingZeros(pattern) / 8;
|
station.hash = hashFull(next);
|
||||||
offset += bytes;
|
do {
|
||||||
long h = Long.reverseBytes(next) >>> (8 * (8 - bytes));
|
|
||||||
station.hash = (int) (h ^ (h >>> 32));
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
long h = next;
|
|
||||||
station.hash = (int) (h ^ (h >>> 32));
|
|
||||||
while (pattern == 0) {
|
|
||||||
offset += 8;
|
offset += 8;
|
||||||
next = UNSAFE.getLong(offset);
|
next = UNSAFE.getLong(offset);
|
||||||
pattern = applyPattern(next, SEMICOLON_MASK);
|
pattern = applyPattern(next, SEMICOLON_MASK);
|
||||||
}
|
} while (pattern == 0);
|
||||||
|
|
||||||
bytes = Long.numberOfTrailingZeros(pattern) / 8;
|
bytes = Long.numberOfTrailingZeros(pattern) / 8;
|
||||||
offset += bytes;
|
offset += bytes;
|
||||||
|
tailMask = ((1L << (8 * bytes)) - 1);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
bytes = Long.numberOfTrailingZeros(pattern) / 8;
|
||||||
|
offset += bytes;
|
||||||
|
tailMask = ((1L << (8 * bytes)) - 1);
|
||||||
|
|
||||||
|
station.hash = hashPartial(next, bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
int len = (int) (offset - start);
|
int len = (int) (offset - start);
|
||||||
station.offset = start;
|
station.offset = start;
|
||||||
station.len = len;
|
station.len = len;
|
||||||
station.tail = next & ((1L << (8 * bytes)) - 1);
|
station.tail = next & tailMask;
|
||||||
|
|
||||||
offset++;
|
offset++;
|
||||||
|
|
||||||
@@ -140,7 +153,7 @@ public class CalculateAverage_roman_r_m {
|
|||||||
long numLen = applyPattern(encodedVal, DOT_MASK);
|
long numLen = applyPattern(encodedVal, DOT_MASK);
|
||||||
numLen = Long.numberOfTrailingZeros(numLen) / 8;
|
numLen = Long.numberOfTrailingZeros(numLen) / 8;
|
||||||
|
|
||||||
encodedVal ^= broadcast((byte) 0x30);
|
encodedVal ^= ZEROES_MASK;
|
||||||
|
|
||||||
int intPart = (int) (encodedVal & ((1 << (8 * numLen)) - 1));
|
int intPart = (int) (encodedVal & ((1 << (8 * numLen)) - 1));
|
||||||
intPart <<= 8 * (2 - numLen);
|
intPart <<= 8 * (2 - numLen);
|
||||||
@@ -285,24 +298,31 @@ public class CalculateAverage_roman_r_m {
|
|||||||
int h = s.hashCode();
|
int h = s.hashCode();
|
||||||
int idx = (SIZE - 1) & h;
|
int idx = (SIZE - 1) & h;
|
||||||
|
|
||||||
|
var keys = this.keys;
|
||||||
|
|
||||||
|
int idx0 = idx;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
while (keys[idx] != null && !keys[idx].equals(s)) {
|
while (true) {
|
||||||
i++;
|
if (keys[idx] != null && keys[idx].equals(s)) {
|
||||||
idx = (idx + i * i) % SIZE;
|
values[idx][0] = Math.min(values[idx][0], value);
|
||||||
|
values[idx][1] = Math.max(values[idx][1], value);
|
||||||
|
values[idx][2] += value;
|
||||||
|
values[idx][3] += 1;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
if (keys[idx] == null) {
|
else if (keys[idx] == null) {
|
||||||
keys[idx] = s.copy();
|
keys[idx] = s.copy();
|
||||||
values[idx] = new int[4];
|
values[idx] = new int[4];
|
||||||
values[idx][0] = value;
|
values[idx][0] = value;
|
||||||
values[idx][1] = value;
|
values[idx][1] = value;
|
||||||
values[idx][2] = value;
|
values[idx][2] = value;
|
||||||
values[idx][3] = 1;
|
values[idx][3] = 1;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
values[idx][0] = Math.min(values[idx][0], value);
|
i++;
|
||||||
values[idx][1] = Math.max(values[idx][1], value);
|
idx = (idx0 + i * i) % SIZE;
|
||||||
values[idx][2] += value;
|
}
|
||||||
values[idx][3] += 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user