change chunking formula and some refactoring

This commit is contained in:
Vasily Zernin 2024-01-13 18:12:38 +00:00 committed by Gunnar Morling
parent 4265c7e9a8
commit 32143b2a4c

View File

@ -32,7 +32,7 @@ import java.util.concurrent.Future;
public class CalculateAverage_zerninv { public class CalculateAverage_zerninv {
private static final String FILE = "./measurements.txt"; private static final String FILE = "./measurements.txt";
private static final int MIN_CHUNK_SIZE = 1024 * 1024 * 16; private static final int MIN_FILE_SIZE = 1024 * 1024 * 16;
private static final char DELIMITER = ';'; private static final char DELIMITER = ';';
private static final char LINE_SEPARATOR = '\n'; private static final char LINE_SEPARATOR = '\n';
private static final char ZERO = '0'; private static final char ZERO = '0';
@ -48,10 +48,8 @@ public class CalculateAverage_zerninv {
var memorySegment = channel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.global()); var memorySegment = channel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.global());
long address = memorySegment.address(); long address = memorySegment.address();
var cores = Runtime.getRuntime().availableProcessors(); var cores = Runtime.getRuntime().availableProcessors();
var chunkAmount = cores - 1; var minChunkSize = fileSize < MIN_FILE_SIZE ? fileSize : fileSize / cores;
// var maxChunkSize = Math.min(fileSize, MIN_CHUNK_SIZE); var chunks = splitByChunks(address, address + fileSize, minChunkSize);
var maxChunkSize = fileSize < MIN_CHUNK_SIZE ? fileSize : fileSize / chunkAmount;
var chunks = splitByChunks(address, address + fileSize, maxChunkSize);
var executor = Executors.newFixedThreadPool(cores); var executor = Executors.newFixedThreadPool(cores);
List<Future<Map<String, MeasurementAggregation>>> fResults = new ArrayList<>(); List<Future<Map<String, MeasurementAggregation>>> fResults = new ArrayList<>();
@ -97,15 +95,13 @@ public class CalculateAverage_zerninv {
} }
} }
private static List<Long> splitByChunks(long address, long end, long maxChunkSize) { private static List<Long> splitByChunks(long address, long end, long minChunkSize) {
List<Long> result = new ArrayList<>(); List<Long> result = new ArrayList<>();
result.add(address); result.add(address);
while (address < end) { while (address < end) {
long ptr = address + Math.min(end - address, maxChunkSize) - 1; address += Math.min(end - address, minChunkSize);
while (UNSAFE.getByte(ptr) != LINE_SEPARATOR) { while (address < end && UNSAFE.getByte(address++) != LINE_SEPARATOR) {
ptr--;
} }
address = ptr + 1;
result.add(address); result.add(address);
} }
return result; return result;
@ -114,41 +110,41 @@ public class CalculateAverage_zerninv {
private static Map<String, MeasurementAggregation> calcForChunk(long offset, long end) { private static Map<String, MeasurementAggregation> calcForChunk(long offset, long end) {
var results = new MeasurementContainer(); var results = new MeasurementContainer();
long cityOffset, temperatureOffset; long cityOffset;
int hashCode, temperature; int hashCode, temperature, multiplier;
byte cityNameSize, b; byte cityNameSize, b;
while (offset < end) { while (offset < end) {
cityOffset = offset; cityOffset = offset;
hashCode = 0; hashCode = 0;
while ((b = UNSAFE.getByte(offset++)) != DELIMITER) { while ((b = UNSAFE.getByte(offset++)) != DELIMITER) {
hashCode = 31 * hashCode + b; hashCode = hashCode * 31 + b;
} }
cityNameSize = (byte) (offset - cityOffset - 1);
temperatureOffset = offset; multiplier = 1;
cityNameSize = (byte) (temperatureOffset - cityOffset - 1); temperature = UNSAFE.getByte(offset++) - ZERO;
if (temperature == MINUS - ZERO) {
multiplier = -1;
temperature = 0; temperature = 0;
}
while ((b = UNSAFE.getByte(offset++)) != LINE_SEPARATOR) { while ((b = UNSAFE.getByte(offset++)) != LINE_SEPARATOR) {
if (b >= ZERO && b <= NINE) { if (b >= ZERO && b <= NINE) {
temperature = temperature * 10 + (b - ZERO); temperature = temperature * 10 + (b - ZERO);
} }
} }
if (UNSAFE.getByte(temperatureOffset) == MINUS) { results.put(cityOffset, cityNameSize, hashCode, (short) (temperature * multiplier));
temperature *= -1;
}
results.put(cityOffset, cityNameSize, hashCode, (short) temperature);
} }
return results.toStringMap(); return results.toStringMap();
} }
private static final class MeasurementAggregation { private static final class MeasurementAggregation {
private long sum; private long sum;
private long count; private int count;
private short min; private short min;
private short max; private short max;
public MeasurementAggregation(long sum, long count, short min, short max) { public MeasurementAggregation(long sum, int count, short min, short max) {
this.sum = sum; this.sum = sum;
this.count = count; this.count = count;
this.min = min; this.min = min;
@ -174,14 +170,14 @@ public class CalculateAverage_zerninv {
private static final class MeasurementContainer { private static final class MeasurementContainer {
private static final int SIZE = 1024 * 16; private static final int SIZE = 1024 * 16;
private static final int ENTRY_SIZE = 8 + 1 + 4 + 8 + 8 + 2 + 2; private static final int ENTRY_SIZE = 4 + 4 + 1 + 8 + 8 + 2 + 2;
private static final int COUNT_OFFSET = 0; private static final int COUNT_OFFSET = 0;
private static final int HASH_OFFSET = 4;
private static final int SIZE_OFFSET = 8; private static final int SIZE_OFFSET = 8;
private static final int HASH_OFFSET = 9; private static final int ADDRESS_OFFSET = 9;
private static final int ADDRESS_OFFSET = 13; private static final int SUM_OFFSET = 17;
private static final int SUM_OFFSET = 21; private static final int MIN_OFFSET = 25;
private static final int MIN_OFFSET = 29; private static final int MAX_OFFSET = 27;
private static final int MAX_OFFSET = 31;
private final long address; private final long address;
@ -195,11 +191,23 @@ public class CalculateAverage_zerninv {
} }
public void put(long address, byte size, int hash, short value) { public void put(long address, byte size, int hash, short value) {
long ptr = findAddress(address, size, hash); int idx = Math.abs(hash % SIZE);
long ptr = this.address + idx * ENTRY_SIZE;
int count;
UNSAFE.putLong(ptr + COUNT_OFFSET, UNSAFE.getLong(ptr + COUNT_OFFSET) + 1); while ((count = UNSAFE.getInt(ptr + COUNT_OFFSET)) != 0) {
UNSAFE.putByte(ptr + SIZE_OFFSET, size); if (UNSAFE.getInt(ptr + HASH_OFFSET) == hash
&& UNSAFE.getByte(ptr + SIZE_OFFSET) == size
&& isEqual(UNSAFE.getLong(ptr + ADDRESS_OFFSET), address, size)) {
break;
}
idx = (idx + 1) % SIZE;
ptr = this.address + idx * ENTRY_SIZE;
}
UNSAFE.putInt(ptr + COUNT_OFFSET, count + 1);
UNSAFE.putInt(ptr + HASH_OFFSET, hash); UNSAFE.putInt(ptr + HASH_OFFSET, hash);
UNSAFE.putByte(ptr + SIZE_OFFSET, size);
UNSAFE.putLong(ptr + ADDRESS_OFFSET, address); UNSAFE.putLong(ptr + ADDRESS_OFFSET, address);
UNSAFE.putLong(ptr + SUM_OFFSET, UNSAFE.getLong(ptr + SUM_OFFSET) + value); UNSAFE.putLong(ptr + SUM_OFFSET, UNSAFE.getLong(ptr + SUM_OFFSET) + value);
@ -213,12 +221,14 @@ public class CalculateAverage_zerninv {
public Map<String, MeasurementAggregation> toStringMap() { public Map<String, MeasurementAggregation> toStringMap() {
var result = new HashMap<String, MeasurementAggregation>(); var result = new HashMap<String, MeasurementAggregation>();
int count;
for (int i = 0; i < SIZE; i++) { for (int i = 0; i < SIZE; i++) {
long ptr = this.address + i * ENTRY_SIZE; long ptr = this.address + i * ENTRY_SIZE;
if (UNSAFE.getLong(ptr + COUNT_OFFSET) != 0) { count = UNSAFE.getInt(ptr + COUNT_OFFSET);
if (count != 0) {
var measurements = new MeasurementAggregation( var measurements = new MeasurementAggregation(
UNSAFE.getLong(ptr + SUM_OFFSET), UNSAFE.getLong(ptr + SUM_OFFSET),
UNSAFE.getLong(ptr + COUNT_OFFSET), count,
UNSAFE.getShort(ptr + MIN_OFFSET), UNSAFE.getShort(ptr + MIN_OFFSET),
UNSAFE.getShort(ptr + MAX_OFFSET)); UNSAFE.getShort(ptr + MAX_OFFSET));
var key = createString(UNSAFE.getLong(ptr + ADDRESS_OFFSET), UNSAFE.getByte(ptr + SIZE_OFFSET)); var key = createString(UNSAFE.getLong(ptr + ADDRESS_OFFSET), UNSAFE.getByte(ptr + SIZE_OFFSET));
@ -228,21 +238,6 @@ public class CalculateAverage_zerninv {
return result; return result;
} }
private long findAddress(long address, byte size, int hash) {
int idx = Math.abs(hash % SIZE);
long ptr = this.address + idx * ENTRY_SIZE;
while (UNSAFE.getLong(ptr + COUNT_OFFSET) != 0) {
if (UNSAFE.getByte(ptr + SIZE_OFFSET) == size
&& UNSAFE.getInt(ptr + HASH_OFFSET) == hash
&& isEqual(UNSAFE.getLong(ptr + ADDRESS_OFFSET), address, size)) {
break;
}
idx = (idx + 1) % SIZE;
ptr = this.address + idx * ENTRY_SIZE;
}
return ptr;
}
private boolean isEqual(long address, long address2, byte size) { private boolean isEqual(long address, long address2, byte size) {
for (int i = 0; i < size; i++) { for (int i = 0; i < size; i++) {
if (UNSAFE.getByte(address + i) != UNSAFE.getByte(address2 + i)) { if (UNSAFE.getByte(address + i) != UNSAFE.getByte(address2 + i)) {