serkan-ozal's 3rd submission with some minor improvements: (#615)

- faster merge by ignoring empty entries in the map
- enable CDS for faster startup (added `prepare_serkan-ozal.sh` to generate CDS archive in advance)
- some tweaks with JVM options
- optimized result printing
This commit is contained in:
Serkan ÖZAL 2024-01-29 01:02:01 +03:00 committed by GitHub
parent ff35a4628b
commit 46d375e621
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 85 additions and 18 deletions

View File

@ -16,9 +16,12 @@
# #
JAVA_OPTS="--enable-preview --enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector " JAVA_OPTS="--enable-preview --enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector "
JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:+UnlockDiagnosticVMOptions"
JAVA_OPTS="$JAVA_OPTS -XX:-TieredCompilation -XX:MaxInlineSize=10000 -XX:InlineSmallCode=10000 -XX:FreqInlineSize=10000" JAVA_OPTS="$JAVA_OPTS -XX:-TieredCompilation -XX:MaxInlineSize=10000 -XX:InlineSmallCode=10000 -XX:FreqInlineSize=10000"
JAVA_OPTS="$JAVA_OPTS -XX:-UseCountedLoopSafepoints -XX:GuaranteedSafepointInterval=0"
JAVA_OPTS="$JAVA_OPTS -XX:+TrustFinalNonStaticFields -da -dsa -XX:+UseNUMA -XX:-EnableJVMCI"
JAVA_OPTS="$JAVA_OPTS -XX:SharedArchiveFile=target/CalculateAverage_serkan_ozal_cds.jsa"
JAVA_OPTS="$JAVA_OPTS -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0" JAVA_OPTS="$JAVA_OPTS -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0"
#JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:+UseEpsilonGC -Xms256m -Xmx256m -XX:+AlwaysPreTouch"
if [[ ! "$(uname -s)" = "Darwin" ]]; then if [[ ! "$(uname -s)" = "Darwin" ]]; then
JAVA_OPTS="$JAVA_OPTS -XX:+UseTransparentHugePages" JAVA_OPTS="$JAVA_OPTS -XX:+UseTransparentHugePages"
fi fi

42
prepare_serkan-ozal.sh Executable file
View File

@ -0,0 +1,42 @@
#!/bin/bash
#
# Copyright 2023 The original authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
source "$HOME/.sdkman/bin/sdkman-init.sh"
sdk use java 21.0.1-open 1>&2
JAVA_OPTS="--enable-preview --enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector "
JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:+UnlockDiagnosticVMOptions"
JAVA_OPTS="$JAVA_OPTS -XX:-TieredCompilation -XX:MaxInlineSize=10000 -XX:InlineSmallCode=10000 -XX:FreqInlineSize=10000"
JAVA_OPTS="$JAVA_OPTS -XX:-UseCountedLoopSafepoints -XX:GuaranteedSafepointInterval=0"
JAVA_OPTS="$JAVA_OPTS -XX:+TrustFinalNonStaticFields -da -dsa -XX:+UseNUMA -XX:-EnableJVMCI"
JAVA_OPTS="$JAVA_OPTS -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0"
JAVA_OPTS="${JAVA_OPTS} -Dfile.path=src/test/resources/samples/measurements-10000-unique-keys.txt"
if [[ ! "$(uname -s)" = "Darwin" ]]; then
JAVA_OPTS="$JAVA_OPTS -XX:+UseTransparentHugePages"
fi
# Set configs
export USE_SHARED_ARENA=true
export USE_SHARED_REGION=true
export CLOSE_STDOUT_ON_RESULT=true
CLASS_NAME="CalculateAverage_serkan_ozal"
# Create CDS archive
java ${JAVA_OPTS} -Xshare:off -XX:DumpLoadedClassList=target/${CLASS_NAME}.classlist --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.${CLASS_NAME}
java ${JAVA_OPTS} -Xshare:dump -XX:SharedClassListFile=target/${CLASS_NAME}.classlist -XX:SharedArchiveFile=target/${CLASS_NAME}.jsa --class-path target/average-1.0.0-SNAPSHOT.jar
java ${JAVA_OPTS} -Xshare:on -XX:SharedArchiveFile=target/${CLASS_NAME}.jsa -XX:ArchiveClassesAtExit=target/${CLASS_NAME}_cds.jsa --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.${CLASS_NAME}

View File

@ -30,6 +30,7 @@ import java.nio.ByteOrder;
import java.nio.channels.FileChannel; import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.TreeMap; import java.util.TreeMap;
@ -47,7 +48,7 @@ import java.util.concurrent.locks.ReentrantLock;
*/ */
public class CalculateAverage_serkan_ozal { public class CalculateAverage_serkan_ozal {
private static final String FILE = "./measurements.txt"; private static final String FILE = System.getProperty("file.path", "./measurements.txt");
private static final VectorSpecies<Byte> BYTE_SPECIES = ByteVector.SPECIES_PREFERRED.length() >= 16 private static final VectorSpecies<Byte> BYTE_SPECIES = ByteVector.SPECIES_PREFERRED.length() >= 16
// Since majority (99%) of the city names <= 16 bytes, according to my experiments, // Since majority (99%) of the city names <= 16 bytes, according to my experiments,
@ -327,7 +328,7 @@ public class CalculateAverage_serkan_ozal {
private void doProcessRegion(MemorySegment region, long regionAddress, long regionStart, long regionEnd) { private void doProcessRegion(MemorySegment region, long regionAddress, long regionStart, long regionEnd) {
final int vectorSize = BYTE_SPECIES.vectorByteSize(); final int vectorSize = BYTE_SPECIES.vectorByteSize();
final long regionMainLimit = regionEnd - MAX_LINE_LENGTH; final long regionMainLimit = regionEnd - BYTE_SPECIES_SIZE;
long regionPtr; long regionPtr;
@ -515,7 +516,20 @@ public class CalculateAverage_serkan_ozal {
} }
private void print() { private void print() {
System.out.println(resultMap); StringBuilder sb = new StringBuilder(1 << 14);
boolean firstEntryAppended = false;
sb.append("{");
for (Map.Entry<String, KeyResult> e : resultMap.entrySet()) {
if (firstEntryAppended) {
sb.append(", ");
}
String key = e.getKey();
KeyResult value = e.getValue();
sb.append(key).append("=").append(value);
firstEntryAppended = true;
}
sb.append('}');
System.out.println(sb);
} }
} }
@ -546,8 +560,12 @@ public class CalculateAverage_serkan_ozal {
private static final int ENTRY_HASH_MASK = MAP_CAPACITY - 1; private static final int ENTRY_HASH_MASK = MAP_CAPACITY - 1;
private static final int MAP_SIZE = ENTRY_SIZE * MAP_CAPACITY; private static final int MAP_SIZE = ENTRY_SIZE * MAP_CAPACITY;
private static final int ENTRY_MASK = MAP_SIZE - 1; private static final int ENTRY_MASK = MAP_SIZE - 1;
private static final int KEY_ARRAY_OFFSET = KEY_OFFSET - Unsafe.ARRAY_BYTE_BASE_OFFSET;
private final byte[] data; private final byte[] data;
// Max number of unique keys are 10K, so 1 << 14 (16384) is long enough to hold offsets for all of them
private final long[] entryOffsets = new long[1 << 14];
private int entryOffsetIdx = 0;
private OpenMap() { private OpenMap() {
this.data = new byte[MAP_SIZE]; this.data = new byte[MAP_SIZE];
@ -579,7 +597,6 @@ public class CalculateAverage_serkan_ozal {
// and continue until find an available slot in case of hash collision // and continue until find an available slot in case of hash collision
// TODO Prevent infinite loop if all the slots are in use for other keys // TODO Prevent infinite loop if all the slots are in use for other keys
for (long entryOffset = Unsafe.ARRAY_BYTE_BASE_OFFSET + (idx * ENTRY_SIZE);; entryOffset = (entryOffset + ENTRY_SIZE) & ENTRY_MASK) { for (long entryOffset = Unsafe.ARRAY_BYTE_BASE_OFFSET + (idx * ENTRY_SIZE);; entryOffset = (entryOffset + ENTRY_SIZE) & ENTRY_MASK) {
int keyStartOffset = (int) entryOffset + KEY_OFFSET;
int keySize = U.getInt(data, entryOffset + KEY_SIZE_OFFSET); int keySize = U.getInt(data, entryOffset + KEY_SIZE_OFFSET);
// Check whether current index is empty (no another key is inserted yet) // Check whether current index is empty (no another key is inserted yet)
if (keySize == 0) { if (keySize == 0) {
@ -587,26 +604,28 @@ public class CalculateAverage_serkan_ozal {
U.putShort(data, entryOffset + MIN_VALUE_OFFSET, Short.MAX_VALUE); U.putShort(data, entryOffset + MIN_VALUE_OFFSET, Short.MAX_VALUE);
U.putShort(data, entryOffset + MAX_VALUE_OFFSET, Short.MIN_VALUE); U.putShort(data, entryOffset + MAX_VALUE_OFFSET, Short.MIN_VALUE);
U.putInt(data, entryOffset + KEY_SIZE_OFFSET, keyLength); U.putInt(data, entryOffset + KEY_SIZE_OFFSET, keyLength);
U.copyMemory(null, keyStartAddress, data, keyStartOffset, keyLength); U.copyMemory(null, keyStartAddress, data, entryOffset + KEY_OFFSET, keyLength);
entryOffsets[entryOffsetIdx++] = entryOffset;
return entryOffset; return entryOffset;
} }
int keyStartArrayOffset = (int) entryOffset + KEY_ARRAY_OFFSET;
// Check for hash collision (hashes are same, but keys are different). // Check for hash collision (hashes are same, but keys are different).
// If there is no collision (both hashes and keys are equals), return current slot's offset. // If there is no collision (both hashes and keys are equals), return current slot's offset.
// Otherwise, continue iterating until find an available slot. // Otherwise, continue iterating until find an available slot.
if (keySize == keyLength && keysEqual(keyVector, keyStartAddress, keyLength, keyStartOffset)) { if (keySize == keyLength && keysEqual(keyVector, keyStartAddress, keyLength, keyStartArrayOffset)) {
return entryOffset; return entryOffset;
} }
} }
} }
private boolean keysEqual(ByteVector keyVector, long keyStartAddress, int keyLength, int keyStartOffset) { private boolean keysEqual(ByteVector keyVector, long keyStartAddress, int keyLength, int keyStartArrayOffset) {
int keyCheckIdx = 0; int keyCheckIdx = 0;
if (keyVector != null) { if (keyVector != null) {
// Use vectorized search for the comparison of keys. // Use vectorized search for the comparison of keys.
// Since majority of the city names >= 8 bytes and <= 16 bytes, // Since majority of the city names >= 8 bytes and <= 16 bytes,
// this way is more efficient (according to my experiments) than any other comparisons (byte by byte or 2 longs). // this way is more efficient (according to my experiments) than any other comparisons (byte by byte or 2 longs).
int keyCheckLength = Math.min(BYTE_SPECIES_SIZE, keyLength); int keyCheckLength = Math.min(BYTE_SPECIES_SIZE, keyLength);
ByteVector entryKeyVector = ByteVector.fromArray(BYTE_SPECIES, data, keyStartOffset - Unsafe.ARRAY_BYTE_BASE_OFFSET); ByteVector entryKeyVector = ByteVector.fromArray(BYTE_SPECIES, data, keyStartArrayOffset);
long eqMask = keyVector.compare(VectorOperators.EQ, entryKeyVector).toLong(); long eqMask = keyVector.compare(VectorOperators.EQ, entryKeyVector).toLong();
int eqCount = Long.numberOfTrailingZeros(~eqMask); int eqCount = Long.numberOfTrailingZeros(~eqMask);
if (eqCount < keyCheckLength) { if (eqCount < keyCheckLength) {
@ -625,6 +644,7 @@ public class CalculateAverage_serkan_ozal {
normalizedKeyLength = Integer.reverseBytes(normalizedKeyLength); normalizedKeyLength = Integer.reverseBytes(normalizedKeyLength);
} }
long keyStartOffset = keyStartArrayOffset + Unsafe.ARRAY_BYTE_BASE_OFFSET;
int alignedKeyLength = normalizedKeyLength & 0xFFFFFFF8; int alignedKeyLength = normalizedKeyLength & 0xFFFFFFF8;
int i; int i;
for (i = keyCheckIdx; i < alignedKeyLength; i += Long.BYTES) { for (i = keyCheckIdx; i < alignedKeyLength; i += Long.BYTES) {
@ -663,18 +683,20 @@ public class CalculateAverage_serkan_ozal {
private void merge(Map<String, KeyResult> resultMap) { private void merge(Map<String, KeyResult> resultMap) {
// Merge this local map into global result map // Merge this local map into global result map
for (int i = 0; i < MAP_SIZE; i += ENTRY_SIZE) { Arrays.sort(entryOffsets, 0, entryOffsetIdx);
int baseOffset = Unsafe.ARRAY_BYTE_BASE_OFFSET + i; for (int i = 0; i < entryOffsetIdx; i++) {
int keyLength = U.getInt(data, baseOffset + KEY_SIZE_OFFSET); long entryOffset = entryOffsets[i];
int keyLength = U.getInt(data, entryOffset + KEY_SIZE_OFFSET);
if (keyLength == 0) { if (keyLength == 0) {
// No entry is available for this index, so continue iterating // No entry is available for this index, so continue iterating
continue; continue;
} }
String key = new String(data, i + KEY_OFFSET, keyLength, StandardCharsets.UTF_8); int entryArrayIdx = (int) (entryOffset + KEY_OFFSET - Unsafe.ARRAY_BYTE_BASE_OFFSET);
int count = U.getInt(data, baseOffset + COUNT_OFFSET); String key = new String(data, entryArrayIdx, keyLength, StandardCharsets.UTF_8);
short minValue = U.getShort(data, baseOffset + MIN_VALUE_OFFSET); int count = U.getInt(data, entryOffset + COUNT_OFFSET);
short maxValue = U.getShort(data, baseOffset + MAX_VALUE_OFFSET); short minValue = U.getShort(data, entryOffset + MIN_VALUE_OFFSET);
long sum = U.getLong(data, baseOffset + VALUE_SUM_OFFSET); short maxValue = U.getShort(data, entryOffset + MAX_VALUE_OFFSET);
long sum = U.getLong(data, entryOffset + VALUE_SUM_OFFSET);
KeyResult result = new KeyResult(count, minValue, maxValue, sum); KeyResult result = new KeyResult(count, minValue, maxValue, sum);
KeyResult existingResult = resultMap.get(key); KeyResult existingResult = resultMap.get(key);
if (existingResult == null) { if (existingResult == null) {