serkan-ozal's 7th submission: (#679)

- use smaller regions (increased region count) so there will be less idle time for the workers who completed their tasks
- get rid of some configuration related stuff during initialization which might save a few tens of milliseconds hopefully
- update temperature value parsing instruction order to get benefit of ILP better (hopefully)
This commit is contained in:
Serkan ÖZAL 2024-01-31 20:21:25 +03:00 committed by GitHub
parent 9b9bb8ed3f
commit 6a2e5058af
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 44 additions and 41 deletions

View File

@ -18,7 +18,7 @@
JAVA_OPTS="--enable-preview --enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector " JAVA_OPTS="--enable-preview --enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector "
JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:+UnlockDiagnosticVMOptions" JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:+UnlockDiagnosticVMOptions"
JAVA_OPTS="$JAVA_OPTS -XX:-TieredCompilation -XX:MaxInlineSize=10000 -XX:InlineSmallCode=10000 -XX:FreqInlineSize=10000" JAVA_OPTS="$JAVA_OPTS -XX:-TieredCompilation -XX:MaxInlineSize=10000 -XX:InlineSmallCode=10000 -XX:FreqInlineSize=10000"
JAVA_OPTS="$JAVA_OPTS -XX:-UseCountedLoopSafepoints -XX:GuaranteedSafepointInterval=0" JAVA_OPTS="$JAVA_OPTS -XX:-UseCountedLoopSafepoints -XX:LoopStripMiningIter=0 -XX:GuaranteedSafepointInterval=0"
JAVA_OPTS="$JAVA_OPTS -XX:+TrustFinalNonStaticFields -da -dsa -XX:+UseNUMA -XX:-EnableJVMCI" JAVA_OPTS="$JAVA_OPTS -XX:+TrustFinalNonStaticFields -da -dsa -XX:+UseNUMA -XX:-EnableJVMCI"
JAVA_OPTS="$JAVA_OPTS -XX:SharedArchiveFile=target/CalculateAverage_serkan_ozal_cds.jsa" JAVA_OPTS="$JAVA_OPTS -XX:SharedArchiveFile=target/CalculateAverage_serkan_ozal_cds.jsa"
JAVA_OPTS="$JAVA_OPTS -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0" JAVA_OPTS="$JAVA_OPTS -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0"
@ -26,10 +26,8 @@ if [[ ! "$(uname -s)" = "Darwin" ]]; then
JAVA_OPTS="$JAVA_OPTS -XX:+UseTransparentHugePages" JAVA_OPTS="$JAVA_OPTS -XX:+UseTransparentHugePages"
fi fi
CONFIGS="USE_SHARED_ARENA=true USE_SHARED_REGION=true CLOSE_STDOUT_ON_RESULT=true REGION_COUNT=128"
#echo "Process started at $(date +%s%N | cut -b1-13)" #echo "Process started at $(date +%s%N | cut -b1-13)"
eval "exec 3< <({ $CONFIGS java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_serkan_ozal; })" eval "exec 3< <({ java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_serkan_ozal; })"
read <&3 result read <&3 result
echo -e "$result" echo -e "$result"
#echo "Process finished at $(date +%s%N | cut -b1-13)" #echo "Process finished at $(date +%s%N | cut -b1-13)"

View File

@ -68,15 +68,15 @@ public class CalculateAverage_serkan_ozal {
// Get configurations // Get configurations
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
private static final boolean VERBOSE = getBooleanConfig("VERBOSE", false); private static final boolean VERBOSE = false; // getBooleanConfig("VERBOSE", false);
private static final int THREAD_COUNT = getIntegerConfig("THREAD_COUNT", Runtime.getRuntime().availableProcessors()); private static final int THREAD_COUNT = Runtime.getRuntime().availableProcessors(); // getIntegerConfig("THREAD_COUNT", Runtime.getRuntime().availableProcessors());
private static final boolean USE_VTHREADS = getBooleanConfig("USE_VTHREADS", false); private static final boolean USE_VTHREADS = false; // getBooleanConfig("USE_VTHREADS", false);
private static final int VTHREAD_COUNT = getIntegerConfig("VTHREAD_COUNT", 1024); private static final int VTHREAD_COUNT = 1024; // getIntegerConfig("VTHREAD_COUNT", 1024);
private static final int REGION_COUNT = getIntegerConfig("REGION_COUNT", -1); private static final int REGION_COUNT = 256; // getIntegerConfig("REGION_COUNT", -1);
private static final boolean USE_SHARED_ARENA = getBooleanConfig("USE_SHARED_ARENA", true); private static final boolean USE_SHARED_ARENA = true; // getBooleanConfig("USE_SHARED_ARENA", true);
private static final boolean USE_SHARED_REGION = getBooleanConfig("USE_SHARED_REGION", true); private static final boolean USE_SHARED_REGION = true; // getBooleanConfig("USE_SHARED_REGION", true);
private static final int MAP_CAPACITY = getIntegerConfig("MAP_CAPACITY", 1 << 17); private static final int MAP_CAPACITY = 1 << 17; // getIntegerConfig("MAP_CAPACITY", 1 << 17);
private static final boolean CLOSE_STDOUT_ON_RESULT = getBooleanConfig("CLOSE_STDOUT_ON_RESULT", true); private static final boolean CLOSE_STDOUT_ON_RESULT = true; // getBooleanConfig("CLOSE_STDOUT_ON_RESULT", true);
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// My dear old friend Unsafe // My dear old friend Unsafe
@ -346,10 +346,16 @@ public class CalculateAverage_serkan_ozal {
// Credits: merykitty // Credits: merykitty
private long extractValue(long regionPtr, long word, OpenMap map, int entryOffset) { private long extractValue(long regionPtr, long word, OpenMap map, int entryOffset) {
// Parse and extract value // Parse and extract value
int decimalSepPos = Long.numberOfTrailingZeros(~word & 0x10101000);
int shift = 28 - decimalSepPos; // 1. level instruction set (no dependency between each other so can be run in parallel)
long signed = (~word << 59) >> 63; long signed = (~word << 59) >> 63;
int decimalSepPos = Long.numberOfTrailingZeros(~word & 0x10101000);
// 2. level instruction set (no dependency between each other so can be run in parallel)
long nextPtr = regionPtr + (decimalSepPos >>> 3) + 3;
int shift = 28 - decimalSepPos;
long designMask = ~(signed & 0xFF); long designMask = ~(signed & 0xFF);
long digits = ((word & designMask) << shift) & 0x0F000F0F00L; long digits = ((word & designMask) << shift) & 0x0F000F0F00L;
long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF; long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF;
int value = (int) ((absValue ^ signed) - signed); int value = (int) ((absValue ^ signed) - signed);
@ -358,12 +364,10 @@ public class CalculateAverage_serkan_ozal {
map.putValue(entryOffset, value); map.putValue(entryOffset, value);
// Return new position // Return new position
return regionPtr + (decimalSepPos >>> 3) + 3; return nextPtr;
} }
private void doProcessRegion(long regionStart, long regionEnd) { private void doProcessRegion(long regionStart, long regionEnd) {
final int vectorSize = BYTE_SPECIES.vectorByteSize();
final long size = regionEnd - regionStart; final long size = regionEnd - regionStart;
final long segmentSize = size / 2; final long segmentSize = size / 2;
@ -392,26 +396,26 @@ public class CalculateAverage_serkan_ozal {
int keyLength1 = keyVector1.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue(); int keyLength1 = keyVector1.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue();
int keyLength2 = keyVector2.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue(); int keyLength2 = keyVector2.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue();
if (keyLength1 != vectorSize && keyLength2 != vectorSize) { if (keyLength1 != BYTE_SPECIES_SIZE && keyLength2 != BYTE_SPECIES_SIZE) {
regionPtr1 += (keyLength1 + 1); regionPtr1 += (keyLength1 + 1);
regionPtr2 += (keyLength2 + 1); regionPtr2 += (keyLength2 + 1);
} }
else { else {
if (keyLength1 != vectorSize) { if (keyLength1 != BYTE_SPECIES_SIZE) {
regionPtr1 += (keyLength1 + 1); regionPtr1 += (keyLength1 + 1);
} }
else { else {
regionPtr1 += vectorSize; regionPtr1 += BYTE_SPECIES_SIZE;
for (; U.getByte(regionPtr1) != KEY_VALUE_SEPARATOR; regionPtr1++) for (; U.getByte(regionPtr1) != KEY_VALUE_SEPARATOR; regionPtr1++)
; ;
keyLength1 = (int) (regionPtr1 - keyStartPtr1); keyLength1 = (int) (regionPtr1 - keyStartPtr1);
regionPtr1++; regionPtr1++;
} }
if (keyLength2 != vectorSize) { if (keyLength2 != BYTE_SPECIES_SIZE) {
regionPtr2 += (keyLength2 + 1); regionPtr2 += (keyLength2 + 1);
} }
else { else {
regionPtr2 += vectorSize; regionPtr2 += BYTE_SPECIES_SIZE;
for (; U.getByte(regionPtr2) != KEY_VALUE_SEPARATOR; regionPtr2++) for (; U.getByte(regionPtr2) != KEY_VALUE_SEPARATOR; regionPtr2++)
; ;
keyLength2 = (int) (regionPtr2 - keyStartPtr2); keyLength2 = (int) (regionPtr2 - keyStartPtr2);
@ -431,28 +435,28 @@ public class CalculateAverage_serkan_ozal {
// Calculate key hashes and find entry indexes // Calculate key hashes and find entry indexes
//////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////
int x1, y1, x2, y2; int x1, y1, x2, y2;
if (keyLength1 >= Integer.BYTES && keyLength2 >= Integer.BYTES) { if (keyLength1 > 3 && keyLength2 > 3) {
x1 = U.getInt(keyStartPtr1); x1 = U.getInt(keyStartPtr1);
y1 = U.getInt(keyStartPtr1 + keyLength1 - Integer.BYTES); y1 = U.getInt(regionPtr1 - 5);
x2 = U.getInt(keyStartPtr2); x2 = U.getInt(keyStartPtr2);
y2 = U.getInt(keyStartPtr2 + keyLength2 - Integer.BYTES); y2 = U.getInt(regionPtr2 - 5);
} }
else { else {
if (keyLength1 >= Integer.BYTES) { if (keyLength1 > 3) {
x1 = U.getInt(keyStartPtr1); x1 = U.getInt(keyStartPtr1);
y1 = U.getInt(keyStartPtr1 + keyLength1 - Integer.BYTES); y1 = U.getInt(regionPtr1 - 5);
} }
else { else {
x1 = U.getByte(keyStartPtr1); x1 = U.getByte(keyStartPtr1);
y1 = U.getByte(keyStartPtr1 + keyLength1 - Byte.BYTES); y1 = U.getByte(regionPtr1 - 2);
} }
if (keyLength2 >= Integer.BYTES) { if (keyLength2 > 3) {
x2 = U.getInt(keyStartPtr2); x2 = U.getInt(keyStartPtr2);
y2 = U.getInt(keyStartPtr2 + keyLength2 - Integer.BYTES); y2 = U.getInt(regionPtr2 - 5);
} }
else { else {
x2 = U.getByte(keyStartPtr2); x2 = U.getByte(keyStartPtr2);
y2 = U.getByte(keyStartPtr2 + keyLength2 - Byte.BYTES); y2 = U.getByte(regionPtr2 - 2);
} }
} }
@ -477,19 +481,19 @@ public class CalculateAverage_serkan_ozal {
} }
// Read and process region - tail // Read and process region - tail
doProcessTail(regionPtr1, regionEnd1, regionPtr2, regionEnd2, vectorSize); doProcessTail(regionPtr1, regionEnd1, regionPtr2, regionEnd2);
} }
private void doProcessTail(long regionPtr1, long regionEnd1, long regionPtr2, long regionEnd2, int vectorSize) { private void doProcessTail(long regionPtr1, long regionEnd1, long regionPtr2, long regionEnd2) {
while (regionPtr1 < regionEnd1) { while (regionPtr1 < regionEnd1) {
long keyStartPtr1 = regionPtr1; long keyStartPtr1 = regionPtr1;
ByteVector keyVector1 = ByteVector.fromMemorySegment(BYTE_SPECIES, NULL, regionPtr1, NATIVE_BYTE_ORDER); ByteVector keyVector1 = ByteVector.fromMemorySegment(BYTE_SPECIES, NULL, regionPtr1, NATIVE_BYTE_ORDER);
int keyLength1 = keyVector1.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue(); int keyLength1 = keyVector1.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue();
if (keyLength1 != vectorSize) { if (keyLength1 != BYTE_SPECIES_SIZE) {
regionPtr1 += (keyLength1 + 1); regionPtr1 += (keyLength1 + 1);
} }
else { else {
regionPtr1 += vectorSize; regionPtr1 += BYTE_SPECIES_SIZE;
for (; U.getByte(regionPtr1) != KEY_VALUE_SEPARATOR; regionPtr1++) for (; U.getByte(regionPtr1) != KEY_VALUE_SEPARATOR; regionPtr1++)
; ;
keyLength1 = (int) (regionPtr1 - keyStartPtr1); keyLength1 = (int) (regionPtr1 - keyStartPtr1);
@ -507,11 +511,11 @@ public class CalculateAverage_serkan_ozal {
long keyStartPtr2 = regionPtr2; long keyStartPtr2 = regionPtr2;
ByteVector keyVector2 = ByteVector.fromMemorySegment(BYTE_SPECIES, NULL, regionPtr2, NATIVE_BYTE_ORDER); ByteVector keyVector2 = ByteVector.fromMemorySegment(BYTE_SPECIES, NULL, regionPtr2, NATIVE_BYTE_ORDER);
int keyLength2 = keyVector2.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue(); int keyLength2 = keyVector2.compare(VectorOperators.EQ, KEY_VALUE_SEPARATOR).firstTrue();
if (keyLength2 != vectorSize) { if (keyLength2 != BYTE_SPECIES_SIZE) {
regionPtr2 += (keyLength2 + 1); regionPtr2 += (keyLength2 + 1);
} }
else { else {
regionPtr2 += vectorSize; regionPtr2 += BYTE_SPECIES_SIZE;
for (; U.getByte(regionPtr2) != KEY_VALUE_SEPARATOR; regionPtr2++) for (; U.getByte(regionPtr2) != KEY_VALUE_SEPARATOR; regionPtr2++)
; ;
keyLength2 = (int) (regionPtr2 - keyStartPtr2); keyLength2 = (int) (regionPtr2 - keyStartPtr2);
@ -804,16 +808,17 @@ public class CalculateAverage_serkan_ozal {
private void putValue(int entryOffset, int value) { private void putValue(int entryOffset, int value) {
int countOffset = entryOffset + COUNT_OFFSET; int countOffset = entryOffset + COUNT_OFFSET;
U.putInt(data, countOffset, U.getInt(data, countOffset) + 1);
int minValueOffset = entryOffset + MIN_VALUE_OFFSET; int minValueOffset = entryOffset + MIN_VALUE_OFFSET;
int maxValueOffset = entryOffset + MAX_VALUE_OFFSET;
int sumOffset = entryOffset + VALUE_SUM_OFFSET;
U.putInt(data, countOffset, U.getInt(data, countOffset) + 1);
if (value < U.getShort(data, minValueOffset)) { if (value < U.getShort(data, minValueOffset)) {
U.putShort(data, minValueOffset, (short) value); U.putShort(data, minValueOffset, (short) value);
} }
int maxValueOffset = entryOffset + MAX_VALUE_OFFSET;
if (value > U.getShort(data, maxValueOffset)) { if (value > U.getShort(data, maxValueOffset)) {
U.putShort(data, maxValueOffset, (short) value); U.putShort(data, maxValueOffset, (short) value);
} }
int sumOffset = entryOffset + VALUE_SUM_OFFSET;
U.putLong(data, sumOffset, U.getLong(data, sumOffset) + value); U.putLong(data, sumOffset, U.getLong(data, sumOffset) + value);
} }