serkan-ozal's 7th submission: (#679)
- use smaller regions (increased region count) so there will be less idle time for the workers who completed their tasks - get rid of some configuration related stuff during initialization which might save a few tens of milliseconds hopefully - update temperature value parsing instruction order to get benefit of ILP better (hopefully)
This commit is contained in:
parent
9b9bb8ed3f
commit
6a2e5058af
@ -18,7 +18,7 @@
|
|||||||
JAVA_OPTS="--enable-preview --enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector "
|
JAVA_OPTS="--enable-preview --enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector "
|
||||||
JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:+UnlockDiagnosticVMOptions"
|
JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:+UnlockDiagnosticVMOptions"
|
||||||
JAVA_OPTS="$JAVA_OPTS -XX:-TieredCompilation -XX:MaxInlineSize=10000 -XX:InlineSmallCode=10000 -XX:FreqInlineSize=10000"
|
JAVA_OPTS="$JAVA_OPTS -XX:-TieredCompilation -XX:MaxInlineSize=10000 -XX:InlineSmallCode=10000 -XX:FreqInlineSize=10000"
|
||||||
JAVA_OPTS="$JAVA_OPTS -XX:-UseCountedLoopSafepoints -XX:GuaranteedSafepointInterval=0"
|
JAVA_OPTS="$JAVA_OPTS -XX:-UseCountedLoopSafepoints -XX:LoopStripMiningIter=0 -XX:GuaranteedSafepointInterval=0"
|
||||||
JAVA_OPTS="$JAVA_OPTS -XX:+TrustFinalNonStaticFields -da -dsa -XX:+UseNUMA -XX:-EnableJVMCI"
|
JAVA_OPTS="$JAVA_OPTS -XX:+TrustFinalNonStaticFields -da -dsa -XX:+UseNUMA -XX:-EnableJVMCI"
|
||||||
JAVA_OPTS="$JAVA_OPTS -XX:SharedArchiveFile=target/CalculateAverage_serkan_ozal_cds.jsa"
|
JAVA_OPTS="$JAVA_OPTS -XX:SharedArchiveFile=target/CalculateAverage_serkan_ozal_cds.jsa"
|
||||||
JAVA_OPTS="$JAVA_OPTS -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0"
|
JAVA_OPTS="$JAVA_OPTS -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0"
|
||||||
@ -26,10 +26,8 @@ if [[ ! "$(uname -s)" = "Darwin" ]]; then
|
|||||||
JAVA_OPTS="$JAVA_OPTS -XX:+UseTransparentHugePages"
|
JAVA_OPTS="$JAVA_OPTS -XX:+UseTransparentHugePages"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
CONFIGS="USE_SHARED_ARENA=true USE_SHARED_REGION=true CLOSE_STDOUT_ON_RESULT=true REGION_COUNT=128"
|
|
||||||
|
|
||||||
#echo "Process started at $(date +%s%N | cut -b1-13)"
|
#echo "Process started at $(date +%s%N | cut -b1-13)"
|
||||||
eval "exec 3< <({ $CONFIGS java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_serkan_ozal; })"
|
eval "exec 3< <({ java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_serkan_ozal; })"
|
||||||
read <&3 result
|
read <&3 result
|
||||||
echo -e "$result"
|
echo -e "$result"
|
||||||
#echo "Process finished at $(date +%s%N | cut -b1-13)"
|
#echo "Process finished at $(date +%s%N | cut -b1-13)"
|
||||||
|
@ -68,15 +68,15 @@ public class CalculateAverage_serkan_ozal {
|
|||||||
|
|
||||||
// Get configurations
|
// Get configurations
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
private static final boolean VERBOSE = getBooleanConfig("VERBOSE", false);
|
private static final boolean VERBOSE = false; // getBooleanConfig("VERBOSE", false);
|
||||||
private static final int THREAD_COUNT = getIntegerConfig("THREAD_COUNT", Runtime.getRuntime().availableProcessors());
|
private static final int THREAD_COUNT = Runtime.getRuntime().availableProcessors(); // getIntegerConfig("THREAD_COUNT", Runtime.getRuntime().availableProcessors());
|
||||||
private static final boolean USE_VTHREADS = getBooleanConfig("USE_VTHREADS", false);
|
private static final boolean USE_VTHREADS = false; // getBooleanConfig("USE_VTHREADS", false);
|
||||||
private static final int VTHREAD_COUNT = getIntegerConfig("VTHREAD_COUNT", 1024);
|
private static final int VTHREAD_COUNT = 1024; // getIntegerConfig("VTHREAD_COUNT", 1024);
|
||||||
private static final int REGION_COUNT = getIntegerConfig("REGION_COUNT", -1);
|
private static final int REGION_COUNT = 256; // getIntegerConfig("REGION_COUNT", -1);
|
||||||
private static final boolean USE_SHARED_ARENA = getBooleanConfig("USE_SHARED_ARENA", true);
|
private static final boolean USE_SHARED_ARENA = true; // getBooleanConfig("USE_SHARED_ARENA", true);
|
||||||
private static final boolean USE_SHARED_REGION = getBooleanConfig("USE_SHARED_REGION", true);
|
private static final boolean USE_SHARED_REGION = true; // getBooleanConfig("USE_SHARED_REGION", true);
|
||||||
private static final int MAP_CAPACITY = getIntegerConfig("MAP_CAPACITY", 1 << 17);
|
private static final int MAP_CAPACITY = 1 << 17; // getIntegerConfig("MAP_CAPACITY", 1 << 17);
|
||||||
private static final boolean CLOSE_STDOUT_ON_RESULT = getBooleanConfig("CLOSE_STDOUT_ON_RESULT", true);
|
private static final boolean CLOSE_STDOUT_ON_RESULT = true; // getBooleanConfig("CLOSE_STDOUT_ON_RESULT", true);
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
// My dear old friend Unsafe
|
// My dear old friend Unsafe
|
||||||
@ -346,10 +346,16 @@ public class CalculateAverage_serkan_ozal {
|
|||||||
// Credits: merykitty
|
// Credits: merykitty
|
||||||
private long extractValue(long regionPtr, long word, OpenMap map, int entryOffset) {
|
private long extractValue(long regionPtr, long word, OpenMap map, int entryOffset) {
|
||||||
// Parse and extract value
|
// Parse and extract value
|
||||||
int decimalSepPos = Long.numberOfTrailingZeros(~word & 0x10101000);
|
|
||||||
int shift = 28 - decimalSepPos;
|
// 1. level instruction set (no dependency between each other so can be run in parallel)
|
||||||
long signed = (~word << 59) >> 63;
|
long signed = (~word << 59) >> 63;
|
||||||
|
int decimalSepPos = Long.numberOfTrailingZeros(~word & 0x10101000);
|
||||||
|
|
||||||
|
// 2. level instruction set (no dependency between each other so can be run in parallel)
|
||||||
|
long nextPtr = regionPtr + (decimalSepPos >>> 3) + 3;
|
||||||
|
int shift = 28 - decimalSepPos;
|
||||||
long designMask = ~(signed & 0xFF);
|
long designMask = ~(signed & 0xFF);
|
||||||
|
|
||||||
long digits = ((word & designMask) << shift) & 0x0F000F0F00L;
|
long digits = ((word & designMask) << shift) & 0x0F000F0F00L;
|
||||||
long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF;
|
long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF;
|
||||||
int value = (int) ((absValue ^ signed) - signed);
|
int value = (int) ((absValue ^ signed) - signed);
|
||||||
@ -358,12 +364,10 @@ public class CalculateAverage_serkan_ozal {
|
|||||||
map.putValue(entryOffset, value);
|
map.putValue(entryOffset, value);
|
||||||
|
|
||||||
// Return new position
|
// Return new position
|
||||||
return regionPtr + (decimalSepPos >>> 3) + 3;
|
return nextPtr;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void doProcessRegion(long regionStart, long regionEnd) {
|
private void doProcessRegion(long regionStart, long regionEnd) {
|
||||||
final int vectorSize = BYTE_SPECIES.vectorByteSize();
|
|
||||||
|
|
||||||
final long size = regionEnd - regionStart;
|
final long size = regionEnd - regionStart;
|
||||||
final long segmentSize = size / 2;
|
final long segmentSize = size / 2;
|
||||||
|
|
||||||
@ -392,26 +396,26 @@ public class CalculateAverage_serkan_ozal {
|
|||||||
int keyLength1 = keyVector1.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue();
|
int keyLength1 = keyVector1.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue();
|
||||||
int keyLength2 = keyVector2.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue();
|
int keyLength2 = keyVector2.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue();
|
||||||
|
|
||||||
if (keyLength1 != vectorSize && keyLength2 != vectorSize) {
|
if (keyLength1 != BYTE_SPECIES_SIZE && keyLength2 != BYTE_SPECIES_SIZE) {
|
||||||
regionPtr1 += (keyLength1 + 1);
|
regionPtr1 += (keyLength1 + 1);
|
||||||
regionPtr2 += (keyLength2 + 1);
|
regionPtr2 += (keyLength2 + 1);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (keyLength1 != vectorSize) {
|
if (keyLength1 != BYTE_SPECIES_SIZE) {
|
||||||
regionPtr1 += (keyLength1 + 1);
|
regionPtr1 += (keyLength1 + 1);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
regionPtr1 += vectorSize;
|
regionPtr1 += BYTE_SPECIES_SIZE;
|
||||||
for (; U.getByte(regionPtr1) != KEY_VALUE_SEPARATOR; regionPtr1++)
|
for (; U.getByte(regionPtr1) != KEY_VALUE_SEPARATOR; regionPtr1++)
|
||||||
;
|
;
|
||||||
keyLength1 = (int) (regionPtr1 - keyStartPtr1);
|
keyLength1 = (int) (regionPtr1 - keyStartPtr1);
|
||||||
regionPtr1++;
|
regionPtr1++;
|
||||||
}
|
}
|
||||||
if (keyLength2 != vectorSize) {
|
if (keyLength2 != BYTE_SPECIES_SIZE) {
|
||||||
regionPtr2 += (keyLength2 + 1);
|
regionPtr2 += (keyLength2 + 1);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
regionPtr2 += vectorSize;
|
regionPtr2 += BYTE_SPECIES_SIZE;
|
||||||
for (; U.getByte(regionPtr2) != KEY_VALUE_SEPARATOR; regionPtr2++)
|
for (; U.getByte(regionPtr2) != KEY_VALUE_SEPARATOR; regionPtr2++)
|
||||||
;
|
;
|
||||||
keyLength2 = (int) (regionPtr2 - keyStartPtr2);
|
keyLength2 = (int) (regionPtr2 - keyStartPtr2);
|
||||||
@ -431,28 +435,28 @@ public class CalculateAverage_serkan_ozal {
|
|||||||
// Calculate key hashes and find entry indexes
|
// Calculate key hashes and find entry indexes
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
int x1, y1, x2, y2;
|
int x1, y1, x2, y2;
|
||||||
if (keyLength1 >= Integer.BYTES && keyLength2 >= Integer.BYTES) {
|
if (keyLength1 > 3 && keyLength2 > 3) {
|
||||||
x1 = U.getInt(keyStartPtr1);
|
x1 = U.getInt(keyStartPtr1);
|
||||||
y1 = U.getInt(keyStartPtr1 + keyLength1 - Integer.BYTES);
|
y1 = U.getInt(regionPtr1 - 5);
|
||||||
x2 = U.getInt(keyStartPtr2);
|
x2 = U.getInt(keyStartPtr2);
|
||||||
y2 = U.getInt(keyStartPtr2 + keyLength2 - Integer.BYTES);
|
y2 = U.getInt(regionPtr2 - 5);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (keyLength1 >= Integer.BYTES) {
|
if (keyLength1 > 3) {
|
||||||
x1 = U.getInt(keyStartPtr1);
|
x1 = U.getInt(keyStartPtr1);
|
||||||
y1 = U.getInt(keyStartPtr1 + keyLength1 - Integer.BYTES);
|
y1 = U.getInt(regionPtr1 - 5);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
x1 = U.getByte(keyStartPtr1);
|
x1 = U.getByte(keyStartPtr1);
|
||||||
y1 = U.getByte(keyStartPtr1 + keyLength1 - Byte.BYTES);
|
y1 = U.getByte(regionPtr1 - 2);
|
||||||
}
|
}
|
||||||
if (keyLength2 >= Integer.BYTES) {
|
if (keyLength2 > 3) {
|
||||||
x2 = U.getInt(keyStartPtr2);
|
x2 = U.getInt(keyStartPtr2);
|
||||||
y2 = U.getInt(keyStartPtr2 + keyLength2 - Integer.BYTES);
|
y2 = U.getInt(regionPtr2 - 5);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
x2 = U.getByte(keyStartPtr2);
|
x2 = U.getByte(keyStartPtr2);
|
||||||
y2 = U.getByte(keyStartPtr2 + keyLength2 - Byte.BYTES);
|
y2 = U.getByte(regionPtr2 - 2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -477,19 +481,19 @@ public class CalculateAverage_serkan_ozal {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Read and process region - tail
|
// Read and process region - tail
|
||||||
doProcessTail(regionPtr1, regionEnd1, regionPtr2, regionEnd2, vectorSize);
|
doProcessTail(regionPtr1, regionEnd1, regionPtr2, regionEnd2);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void doProcessTail(long regionPtr1, long regionEnd1, long regionPtr2, long regionEnd2, int vectorSize) {
|
private void doProcessTail(long regionPtr1, long regionEnd1, long regionPtr2, long regionEnd2) {
|
||||||
while (regionPtr1 < regionEnd1) {
|
while (regionPtr1 < regionEnd1) {
|
||||||
long keyStartPtr1 = regionPtr1;
|
long keyStartPtr1 = regionPtr1;
|
||||||
ByteVector keyVector1 = ByteVector.fromMemorySegment(BYTE_SPECIES, NULL, regionPtr1, NATIVE_BYTE_ORDER);
|
ByteVector keyVector1 = ByteVector.fromMemorySegment(BYTE_SPECIES, NULL, regionPtr1, NATIVE_BYTE_ORDER);
|
||||||
int keyLength1 = keyVector1.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue();
|
int keyLength1 = keyVector1.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue();
|
||||||
if (keyLength1 != vectorSize) {
|
if (keyLength1 != BYTE_SPECIES_SIZE) {
|
||||||
regionPtr1 += (keyLength1 + 1);
|
regionPtr1 += (keyLength1 + 1);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
regionPtr1 += vectorSize;
|
regionPtr1 += BYTE_SPECIES_SIZE;
|
||||||
for (; U.getByte(regionPtr1) != KEY_VALUE_SEPARATOR; regionPtr1++)
|
for (; U.getByte(regionPtr1) != KEY_VALUE_SEPARATOR; regionPtr1++)
|
||||||
;
|
;
|
||||||
keyLength1 = (int) (regionPtr1 - keyStartPtr1);
|
keyLength1 = (int) (regionPtr1 - keyStartPtr1);
|
||||||
@ -507,11 +511,11 @@ public class CalculateAverage_serkan_ozal {
|
|||||||
long keyStartPtr2 = regionPtr2;
|
long keyStartPtr2 = regionPtr2;
|
||||||
ByteVector keyVector2 = ByteVector.fromMemorySegment(BYTE_SPECIES, NULL, regionPtr2, NATIVE_BYTE_ORDER);
|
ByteVector keyVector2 = ByteVector.fromMemorySegment(BYTE_SPECIES, NULL, regionPtr2, NATIVE_BYTE_ORDER);
|
||||||
int keyLength2 = keyVector2.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue();
|
int keyLength2 = keyVector2.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue();
|
||||||
if (keyLength2 != vectorSize) {
|
if (keyLength2 != BYTE_SPECIES_SIZE) {
|
||||||
regionPtr2 += (keyLength2 + 1);
|
regionPtr2 += (keyLength2 + 1);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
regionPtr2 += vectorSize;
|
regionPtr2 += BYTE_SPECIES_SIZE;
|
||||||
for (; U.getByte(regionPtr2) != KEY_VALUE_SEPARATOR; regionPtr2++)
|
for (; U.getByte(regionPtr2) != KEY_VALUE_SEPARATOR; regionPtr2++)
|
||||||
;
|
;
|
||||||
keyLength2 = (int) (regionPtr2 - keyStartPtr2);
|
keyLength2 = (int) (regionPtr2 - keyStartPtr2);
|
||||||
@ -804,16 +808,17 @@ public class CalculateAverage_serkan_ozal {
|
|||||||
|
|
||||||
private void putValue(int entryOffset, int value) {
|
private void putValue(int entryOffset, int value) {
|
||||||
int countOffset = entryOffset + COUNT_OFFSET;
|
int countOffset = entryOffset + COUNT_OFFSET;
|
||||||
U.putInt(data, countOffset, U.getInt(data, countOffset) + 1);
|
|
||||||
int minValueOffset = entryOffset + MIN_VALUE_OFFSET;
|
int minValueOffset = entryOffset + MIN_VALUE_OFFSET;
|
||||||
|
int maxValueOffset = entryOffset + MAX_VALUE_OFFSET;
|
||||||
|
int sumOffset = entryOffset + VALUE_SUM_OFFSET;
|
||||||
|
|
||||||
|
U.putInt(data, countOffset, U.getInt(data, countOffset) + 1);
|
||||||
if (value < U.getShort(data, minValueOffset)) {
|
if (value < U.getShort(data, minValueOffset)) {
|
||||||
U.putShort(data, minValueOffset, (short) value);
|
U.putShort(data, minValueOffset, (short) value);
|
||||||
}
|
}
|
||||||
int maxValueOffset = entryOffset + MAX_VALUE_OFFSET;
|
|
||||||
if (value > U.getShort(data, maxValueOffset)) {
|
if (value > U.getShort(data, maxValueOffset)) {
|
||||||
U.putShort(data, maxValueOffset, (short) value);
|
U.putShort(data, maxValueOffset, (short) value);
|
||||||
}
|
}
|
||||||
int sumOffset = entryOffset + VALUE_SUM_OFFSET;
|
|
||||||
U.putLong(data, sumOffset, U.getLong(data, sumOffset) + value);
|
U.putLong(data, sumOffset, U.getLong(data, sumOffset) + value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user