Updating Sam Pullara's entry

This commit is contained in:
Sam Pullara 2024-01-04 10:14:06 -08:00 committed by GitHub
parent c1954f6a3f
commit 4af3253d53
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 147 additions and 166 deletions

View File

@ -17,5 +17,6 @@
JAVA_OPTS="" JAVA_OPTS=""
sdk use java 21.0.1-graal
time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_spullara time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_spullara

View File

@ -27,13 +27,10 @@ import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import java.util.function.Supplier;
import java.util.stream.Collectors; import java.util.stream.Collectors;
public class CalculateAverage_spullara { public class CalculateAverage_spullara {
private static final String FILE = "./measurements.txt"; private static final String FILE = "./measurements.txt";
/* /*
* My results on this computer: * My results on this computer:
@ -44,189 +41,172 @@ public class CalculateAverage_spullara {
* *
*/ */
public static void main(String[] args) throws IOException, ExecutionException, InterruptedException { public static void main(String[] args) throws IOException, ExecutionException, InterruptedException {
var filename = args.length == 0 ? FILE : args[0]; long start = System.currentTimeMillis();
var file = new File(filename); var filename = args.length == 0 ? FILE : args[0];
long start = System.currentTimeMillis(); var file = new File(filename);
var totalLines = new AtomicInteger(); var resultsMap = getFileSegments(file).stream().map(segment -> {
var results = getFileSegments(file).stream().map(segment -> { var resultMap = new ByteArrayToResultMap();
var resultMap = new ByteArrayToResultMap(); long segmentEnd = segment.end();
long segmentEnd = segment.end(); try (var fileChannel = (FileChannel) Files.newByteChannel(Path.of(filename), StandardOpenOption.READ)) {
try (var fileChannel = (FileChannel) Files.newByteChannel(Path.of(filename), StandardOpenOption.READ)) { var bb = fileChannel.map(FileChannel.MapMode.READ_ONLY, segment.start(), segmentEnd - segment.start());
var bb = fileChannel.map(FileChannel.MapMode.READ_ONLY, segment.start(), segmentEnd - segment.start()); // Up to 100 characters for a city name
var buffer = new byte[64]; var buffer = new byte[100];
int lines = 0; int startLine;
int startLine; int limit = bb.limit();
int limit = bb.limit(); while ((startLine = bb.position()) < limit) {
while ((startLine = bb.position()) < limit) { int currentPosition = startLine;
int currentPosition = startLine; byte b;
byte b; int offset = 0;
int offset = 0; int hash = 0;
while (currentPosition != segmentEnd && (b = bb.get(currentPosition++)) != ';') { while (currentPosition != segmentEnd && (b = bb.get(currentPosition++)) != ';') {
buffer[offset++] = b; buffer[offset++] = b;
} hash = 31 * hash + b;
int temp = 0; }
int negative = 1; int temp;
outer: int negative = 1;
while (currentPosition != segmentEnd && (b = bb.get(currentPosition++)) != '\n') { // Inspired by @yemreinci to unroll this even further
switch (b) { if (bb.get(currentPosition) == '-') {
case '-': negative = -1;
negative = -1; currentPosition++;
case '.': }
break; if (bb.get(currentPosition + 1) == '.') {
case '\r': temp = negative * ((bb.get(currentPosition) - '0') * 10 + (bb.get(currentPosition + 2) - '0'));
currentPosition++; currentPosition += 3;
break outer; }
default: else {
temp = 10 * temp + (b - '0'); temp = negative * ((bb.get(currentPosition) - '0') * 100 + ((bb.get(currentPosition + 1) - '0') * 10 + (bb.get(currentPosition + 3) - '0')));
currentPosition += 4;
}
if (bb.get(currentPosition) == '\r') {
currentPosition++;
}
currentPosition++;
resultMap.putOrMerge(buffer, 0, offset, temp / 10.0, hash);
bb.position(currentPosition);
}
return resultMap;
} }
} catch (IOException e) {
temp *= negative; throw new RuntimeException(e);
double finalTemp = temp / 10.0; }
resultMap.putOrMerge(buffer, 0, offset, }).parallel().flatMap(partition -> partition.getAll().stream())
() -> new Result(finalTemp), .collect(Collectors.toMap(e -> new String(e.key()), Entry::value, CalculateAverage_spullara::merge, TreeMap::new));
measurement -> merge(measurement, finalTemp, finalTemp, finalTemp, 1));
lines++; System.out.println(resultsMap);
bb.position(currentPosition); }
private static List<FileSegment> getFileSegments(File file) throws IOException {
int numberOfSegments = Runtime.getRuntime().availableProcessors();
long fileSize = file.length();
long segmentSize = fileSize / numberOfSegments;
List<FileSegment> segments = new ArrayList<>(numberOfSegments);
// Pointless to split small files
if (segmentSize < 1_000_000) {
segments.add(new FileSegment(0, fileSize));
return segments;
} }
totalLines.addAndGet(lines); try (RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r")) {
return resultMap; for (int i = 0; i < numberOfSegments; i++) {
} catch (IOException e) { long segStart = i * segmentSize;
throw new RuntimeException(e); long segEnd = (i == numberOfSegments - 1) ? fileSize : segStart + segmentSize;
} segStart = findSegment(i, 0, randomAccessFile, segStart, segEnd);
}).parallel().toList(); segEnd = findSegment(i, numberOfSegments - 1, randomAccessFile, segEnd, fileSize);
var resultMap = results.stream() segments.add(new FileSegment(segStart, segEnd));
.flatMap(partition -> partition.getAll().stream()) }
.collect(Collectors.toMap(e -> new String(e.key()), Entry::value, CalculateAverage_spullara::merge, TreeMap::new)); }
return segments;
System.out.println("Time: " + (System.currentTimeMillis() - start) + "ms");
System.out.println("Lines processed: " + totalLines);
System.out.println(resultMap);
}
private static List<FileSegment> getFileSegments(File file) throws IOException {
int numberOfSegments = Runtime.getRuntime().availableProcessors();
long fileSize = file.length();
long segmentSize = fileSize / numberOfSegments;
List<FileSegment> segments = new ArrayList<>();
try (RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r")) {
for (int i = 0; i < numberOfSegments; i++) {
long segStart = i * segmentSize;
long segEnd = (i == numberOfSegments - 1) ? fileSize : segStart + segmentSize;
segStart = findSegment(i, 0, randomAccessFile, segStart, segEnd);
segEnd = findSegment(i, numberOfSegments - 1, randomAccessFile, segEnd, fileSize);
segments.add(new FileSegment(segStart, segEnd));
}
} }
return segments;
}
private static Result merge(Result v, Result value) { private static Result merge(Result v, Result value) {
return merge(v, value.min, value.max, value.sum, value.count); return merge(v, value.min, value.max, value.sum, value.count);
} }
private static Result merge(Result v, double value, double value1, double value2, long value3) { private static Result merge(Result v, double value, double value1, double value2, long value3) {
v.min = Math.min(v.min, value); v.min = Math.min(v.min, value);
v.max = Math.max(v.max, value1); v.max = Math.max(v.max, value1);
v.sum += value2; v.sum += value2;
v.count += value3; v.count += value3;
return v; return v;
} }
private static long findSegment(int i, int skipSegment, RandomAccessFile raf, long location, long fileSize) throws IOException { private static long findSegment(int i, int skipSegment, RandomAccessFile raf, long location, long fileSize) throws IOException {
if (i != skipSegment) { if (i != skipSegment) {
raf.seek(location); raf.seek(location);
while (location < fileSize) { while (location < fileSize) {
location++; location++;
if (raf.read() == '\n') if (raf.read() == '\n')
break; break;
} }
}
return location;
} }
return location;
}
} }
class Result { class Result {
double min, max, sum; double min, max, sum;
long count; long count;
Result(double value) { Result(double value) {
min = max = sum = value; min = max = sum = value;
this.count = 1; this.count = 1;
} }
@Override @Override
public String toString() { public String toString() {
return round(min) + "/" + round(sum / count) + "/" + round(max); return round(min) + "/" + round(sum / count) + "/" + round(max);
} }
double round(double v) { double round(double v) {
return Math.round(v * 10.0) / 10.0; return Math.round(v * 10.0) / 10.0;
} }
} }
record Pair(int slot, Result slotValue) { record Entry(byte[] key, Result value) {
} }
record Entry(byte[] key, Result value) { record FileSegment(long start, long end) {
} }
record FileSegment(long start, long end) {
}
class ByteArrayToResultMap { class ByteArrayToResultMap {
public static final int MAPSIZE = 1024*128; public static final int MAPSIZE = 1024 * 128;
Result[] slots = new Result[MAPSIZE]; Result[] slots = new Result[MAPSIZE];
byte[][] keys = new byte[MAPSIZE][]; byte[][] keys = new byte[MAPSIZE][];
private int hashCode(byte[] a, int fromIndex, int length) { public void putOrMerge(byte[] key, int offset, int size, double temp, int hash) {
int result = 0; int slot = hash & (slots.length - 1);
int end = fromIndex + length; var slotValue = slots[slot];
for (int i = fromIndex; i < end; i++) { // Linear probe for open slot
result = 31 * result + a[i]; while (slotValue != null && (keys[slot].length != size || !Arrays.equals(keys[slot], 0, size, key, offset, size))) {
slot = (slot + 1) & (slots.length - 1);
slotValue = slots[slot];
}
Result value = slotValue;
if (value == null) {
slots[slot] = new Result(temp);
byte[] bytes = new byte[size];
System.arraycopy(key, offset, bytes, 0, size);
keys[slot] = bytes;
} else {
value.min = Math.min(value.min, temp);
value.max = Math.max(value.max, temp);
value.sum += temp;
value.count += 1;
}
} }
return result;
}
private Pair getPair(byte[] key, int offset, int size) { // Get all pairs
int hash = hashCode(key, offset, size); public List<Entry> getAll() {
int slot = hash & (slots.length - 1); List<Entry> result = new ArrayList<>(slots.length);
var slotValue = slots[slot]; for (int i = 0; i < slots.length; i++) {
// Linear probe for open slot Result slotValue = slots[i];
while (slotValue != null && (keys[slot].length != size || !Arrays.equals(keys[slot], 0, size, key, offset, size))) { if (slotValue != null) {
slot = (slot + 1) & (slots.length - 1); result.add(new Entry(keys[i], slotValue));
slotValue = slots[slot]; }
}
return result;
} }
return new Pair(slot, slotValue);
}
public void putOrMerge(byte[] key, int offset, int size, Supplier<Result> supplier, Consumer<Result> merge) {
Pair result = getPair(key, offset, size);
Result value = result.slotValue();
if (value == null) {
int slot = result.slot();
slots[slot] = supplier.get();
byte[] bytes = new byte[size];
System.arraycopy(key, offset, bytes, 0, size);
keys[slot] = bytes;
} else {
merge.accept(value);
}
}
// Get all pairs
public List<Entry> getAll() {
List<Entry> result = new ArrayList<>();
for (int i = 0; i < slots.length; i++) {
Result slotValue = slots[i];
if (slotValue != null) {
result.add(new Entry(keys[i], slotValue));
}
}
return result;
}
} }