New Fresh Solution to Optimize Execution time (#641)

* CalculateAverage_pdrakatos

* Rename to be valid with rules

* CalculateAverage_pdrakatos

* Rename to be valid with rules

* Changes on scripts execution

* Fixing bugs causing scripts not to be executed

* Changes on prepare make it compatible

* Fixing passing all tests

* Increase direct memory allocation buffer

* Fixing memory problem causes heap space exception

* Fresh solution to optimize performance of the execution
This commit is contained in:
Panagiotis Drakatos 2024-01-29 22:16:40 +02:00 committed by GitHub
parent 1281e77be4
commit 31a6740ef1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -15,8 +15,10 @@
*/ */
package dev.morling.onebrc; package dev.morling.onebrc;
import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer; import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel; import java.nio.channels.FileChannel;
@ -26,18 +28,27 @@ import java.util.stream.Stream;
import java.util.stream.StreamSupport; import java.util.stream.StreamSupport;
public class CalculateAverage_PanagiotisDrakatos { public class CalculateAverage_PanagiotisDrakatos {
private static final String FILE = "./measurements.txt"; private static final String FILE = "./measurements.txt";
private static final long SEGMENT_SIZE = 4 * 1024 * 1024;
private static final long COMMA_PATTERN = 0x3B3B3B3B3B3B3B3BL;
private static final long DOT_BITS = 0x10101000;
private static final long MAGIC_MULTIPLIER = (100 * 0x1000000 + 10 * 0x10000 + 1);
private static TreeMap<String, MeasurementObject> sortedCities; private static TreeMap<String, MeasurementObject> sortedCities;
public static void main(String[] args) throws IOException { public static void main(String[] args) throws IOException {
SeekableByteRead(FILE); SeekableByteRead(FILE);
System.out.println(sortedCities); System.out.println(sortedCities);
boolean DEBUG = true;
} }
private static void SeekableByteRead(String path) throws IOException { private static void SeekableByteRead(String path) throws IOException {
FileInputStream fileInputStream = new FileInputStream(FILE); FileInputStream fileInputStream = new FileInputStream(new File(FILE));
FileChannel fileChannel = fileInputStream.getChannel(); FileChannel fileChannel = fileInputStream.getChannel();
Optional<Map<String, MeasurementObject>> optimistic = SplitSeekableByteChannel(fileChannel) Optional<Map<String, MeasurementObject>> optimistic = getFileSegments(new File(FILE), fileChannel)
.stream()
.map(CalculateAverage_PanagiotisDrakatos::SplitSeekableByteChannel)
.parallel() .parallel()
.map(CalculateAverage_PanagiotisDrakatos::MappingByteBufferToData) .map(CalculateAverage_PanagiotisDrakatos::MappingByteBufferToData)
.reduce(CalculateAverage_PanagiotisDrakatos::combineMaps); .reduce(CalculateAverage_PanagiotisDrakatos::combineMaps);
@ -46,37 +57,53 @@ public class CalculateAverage_PanagiotisDrakatos {
} }
private static Stream<ByteBuffer> SplitSeekableByteChannel(FileChannel channel) throws IOException { record FileSegment(long start, long end, FileChannel fileChannel) {
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(new Iterator<ByteBuffer>() { }
private static final long MAP_SIZE = 1024 * 1024 * 10L;
private long position = 0; private static List<FileSegment> getFileSegments(final File file, final FileChannel fileChannel) throws IOException {
private long length = channel.size(); final int numberOfSegments = Runtime.getRuntime().availableProcessors();
final long fileSize = file.length();
@Override final long segmentSize = fileSize / numberOfSegments;
public boolean hasNext() { final List<FileSegment> segments = new ArrayList<>();
while (position < length) { if (segmentSize < 1000) {
return true; segments.add(new FileSegment(0, fileSize, fileChannel));
} return segments;
return false; }
try (RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r")) {
long segStart = 0;
long segEnd = segmentSize;
while (segStart < fileSize) {
segEnd = findSegment(randomAccessFile, segEnd, fileSize);
segments.add(new FileSegment(segStart, segEnd, fileChannel));
segStart = segEnd; // Just re-use the end and go from there.
segEnd = Math.min(fileSize, segEnd + segmentSize);
} }
}
return segments;
}
@Override private static long findSegment(RandomAccessFile raf, long location, final long fileSize) throws IOException {
public ByteBuffer next() { raf.seek(location);
try { while (location < fileSize) {
MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_ONLY, position, Math.min(MAP_SIZE, length - position)); location++;
int end = buffer.limit() - 1; if (raf.read() == '\n')
while (buffer.get(end) != '\n') { return location;
end--; }
} return location;
position += end + 1; }
return buffer.slice(0, end);
} private static ByteBuffer SplitSeekableByteChannel(FileSegment segment) {
catch (IOException e) { try {
throw new RuntimeException(e); MappedByteBuffer buffer = segment.fileChannel.map(FileChannel.MapMode.READ_ONLY, segment.start(), segment.end - segment.start());
} int end = buffer.limit() - 1;
while (buffer.get(end) != '\n') {
end--;
} }
}, Spliterator.IMMUTABLE), false); return buffer.slice(0, end);
}
catch (Exception ex) {
throw new RuntimeException(ex);
}
} }
public static ByteBuffer concat(ByteBuffer[] buffers) { public static ByteBuffer concat(ByteBuffer[] buffers) {