Updating Sam Pullara's entry
This commit is contained in:
parent
c1954f6a3f
commit
4af3253d53
@ -17,5 +17,6 @@
|
|||||||
|
|
||||||
|
|
||||||
JAVA_OPTS=""
|
JAVA_OPTS=""
|
||||||
|
sdk use java 21.0.1-graal
|
||||||
time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_spullara
|
time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_spullara
|
||||||
|
|
||||||
|
@ -27,13 +27,10 @@ import java.util.Arrays;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
import java.util.concurrent.ExecutionException;
|
import java.util.concurrent.ExecutionException;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
|
||||||
import java.util.function.Consumer;
|
|
||||||
import java.util.function.Supplier;
|
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
public class CalculateAverage_spullara {
|
public class CalculateAverage_spullara {
|
||||||
private static final String FILE = "./measurements.txt";
|
private static final String FILE = "./measurements.txt";
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* My results on this computer:
|
* My results on this computer:
|
||||||
@ -44,189 +41,172 @@ public class CalculateAverage_spullara {
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public static void main(String[] args) throws IOException, ExecutionException, InterruptedException {
|
public static void main(String[] args) throws IOException, ExecutionException, InterruptedException {
|
||||||
var filename = args.length == 0 ? FILE : args[0];
|
long start = System.currentTimeMillis();
|
||||||
var file = new File(filename);
|
var filename = args.length == 0 ? FILE : args[0];
|
||||||
long start = System.currentTimeMillis();
|
var file = new File(filename);
|
||||||
|
|
||||||
var totalLines = new AtomicInteger();
|
var resultsMap = getFileSegments(file).stream().map(segment -> {
|
||||||
var results = getFileSegments(file).stream().map(segment -> {
|
var resultMap = new ByteArrayToResultMap();
|
||||||
var resultMap = new ByteArrayToResultMap();
|
long segmentEnd = segment.end();
|
||||||
long segmentEnd = segment.end();
|
try (var fileChannel = (FileChannel) Files.newByteChannel(Path.of(filename), StandardOpenOption.READ)) {
|
||||||
try (var fileChannel = (FileChannel) Files.newByteChannel(Path.of(filename), StandardOpenOption.READ)) {
|
var bb = fileChannel.map(FileChannel.MapMode.READ_ONLY, segment.start(), segmentEnd - segment.start());
|
||||||
var bb = fileChannel.map(FileChannel.MapMode.READ_ONLY, segment.start(), segmentEnd - segment.start());
|
// Up to 100 characters for a city name
|
||||||
var buffer = new byte[64];
|
var buffer = new byte[100];
|
||||||
int lines = 0;
|
int startLine;
|
||||||
int startLine;
|
int limit = bb.limit();
|
||||||
int limit = bb.limit();
|
while ((startLine = bb.position()) < limit) {
|
||||||
while ((startLine = bb.position()) < limit) {
|
int currentPosition = startLine;
|
||||||
int currentPosition = startLine;
|
byte b;
|
||||||
byte b;
|
int offset = 0;
|
||||||
int offset = 0;
|
int hash = 0;
|
||||||
while (currentPosition != segmentEnd && (b = bb.get(currentPosition++)) != ';') {
|
while (currentPosition != segmentEnd && (b = bb.get(currentPosition++)) != ';') {
|
||||||
buffer[offset++] = b;
|
buffer[offset++] = b;
|
||||||
}
|
hash = 31 * hash + b;
|
||||||
int temp = 0;
|
}
|
||||||
int negative = 1;
|
int temp;
|
||||||
outer:
|
int negative = 1;
|
||||||
while (currentPosition != segmentEnd && (b = bb.get(currentPosition++)) != '\n') {
|
// Inspired by @yemreinci to unroll this even further
|
||||||
switch (b) {
|
if (bb.get(currentPosition) == '-') {
|
||||||
case '-':
|
negative = -1;
|
||||||
negative = -1;
|
currentPosition++;
|
||||||
case '.':
|
}
|
||||||
break;
|
if (bb.get(currentPosition + 1) == '.') {
|
||||||
case '\r':
|
temp = negative * ((bb.get(currentPosition) - '0') * 10 + (bb.get(currentPosition + 2) - '0'));
|
||||||
currentPosition++;
|
currentPosition += 3;
|
||||||
break outer;
|
}
|
||||||
default:
|
else {
|
||||||
temp = 10 * temp + (b - '0');
|
temp = negative * ((bb.get(currentPosition) - '0') * 100 + ((bb.get(currentPosition + 1) - '0') * 10 + (bb.get(currentPosition + 3) - '0')));
|
||||||
|
currentPosition += 4;
|
||||||
|
}
|
||||||
|
if (bb.get(currentPosition) == '\r') {
|
||||||
|
currentPosition++;
|
||||||
|
}
|
||||||
|
currentPosition++;
|
||||||
|
resultMap.putOrMerge(buffer, 0, offset, temp / 10.0, hash);
|
||||||
|
bb.position(currentPosition);
|
||||||
|
}
|
||||||
|
return resultMap;
|
||||||
}
|
}
|
||||||
}
|
catch (IOException e) {
|
||||||
temp *= negative;
|
throw new RuntimeException(e);
|
||||||
double finalTemp = temp / 10.0;
|
}
|
||||||
resultMap.putOrMerge(buffer, 0, offset,
|
}).parallel().flatMap(partition -> partition.getAll().stream())
|
||||||
() -> new Result(finalTemp),
|
.collect(Collectors.toMap(e -> new String(e.key()), Entry::value, CalculateAverage_spullara::merge, TreeMap::new));
|
||||||
measurement -> merge(measurement, finalTemp, finalTemp, finalTemp, 1));
|
|
||||||
lines++;
|
System.out.println(resultsMap);
|
||||||
bb.position(currentPosition);
|
}
|
||||||
|
|
||||||
|
private static List<FileSegment> getFileSegments(File file) throws IOException {
|
||||||
|
int numberOfSegments = Runtime.getRuntime().availableProcessors();
|
||||||
|
long fileSize = file.length();
|
||||||
|
long segmentSize = fileSize / numberOfSegments;
|
||||||
|
List<FileSegment> segments = new ArrayList<>(numberOfSegments);
|
||||||
|
// Pointless to split small files
|
||||||
|
if (segmentSize < 1_000_000) {
|
||||||
|
segments.add(new FileSegment(0, fileSize));
|
||||||
|
return segments;
|
||||||
}
|
}
|
||||||
totalLines.addAndGet(lines);
|
try (RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r")) {
|
||||||
return resultMap;
|
for (int i = 0; i < numberOfSegments; i++) {
|
||||||
} catch (IOException e) {
|
long segStart = i * segmentSize;
|
||||||
throw new RuntimeException(e);
|
long segEnd = (i == numberOfSegments - 1) ? fileSize : segStart + segmentSize;
|
||||||
}
|
segStart = findSegment(i, 0, randomAccessFile, segStart, segEnd);
|
||||||
}).parallel().toList();
|
segEnd = findSegment(i, numberOfSegments - 1, randomAccessFile, segEnd, fileSize);
|
||||||
|
|
||||||
var resultMap = results.stream()
|
segments.add(new FileSegment(segStart, segEnd));
|
||||||
.flatMap(partition -> partition.getAll().stream())
|
}
|
||||||
.collect(Collectors.toMap(e -> new String(e.key()), Entry::value, CalculateAverage_spullara::merge, TreeMap::new));
|
}
|
||||||
|
return segments;
|
||||||
System.out.println("Time: " + (System.currentTimeMillis() - start) + "ms");
|
|
||||||
System.out.println("Lines processed: " + totalLines);
|
|
||||||
System.out.println(resultMap);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static List<FileSegment> getFileSegments(File file) throws IOException {
|
|
||||||
int numberOfSegments = Runtime.getRuntime().availableProcessors();
|
|
||||||
long fileSize = file.length();
|
|
||||||
long segmentSize = fileSize / numberOfSegments;
|
|
||||||
List<FileSegment> segments = new ArrayList<>();
|
|
||||||
try (RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r")) {
|
|
||||||
for (int i = 0; i < numberOfSegments; i++) {
|
|
||||||
long segStart = i * segmentSize;
|
|
||||||
long segEnd = (i == numberOfSegments - 1) ? fileSize : segStart + segmentSize;
|
|
||||||
segStart = findSegment(i, 0, randomAccessFile, segStart, segEnd);
|
|
||||||
segEnd = findSegment(i, numberOfSegments - 1, randomAccessFile, segEnd, fileSize);
|
|
||||||
|
|
||||||
segments.add(new FileSegment(segStart, segEnd));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return segments;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Result merge(Result v, Result value) {
|
private static Result merge(Result v, Result value) {
|
||||||
return merge(v, value.min, value.max, value.sum, value.count);
|
return merge(v, value.min, value.max, value.sum, value.count);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Result merge(Result v, double value, double value1, double value2, long value3) {
|
private static Result merge(Result v, double value, double value1, double value2, long value3) {
|
||||||
v.min = Math.min(v.min, value);
|
v.min = Math.min(v.min, value);
|
||||||
v.max = Math.max(v.max, value1);
|
v.max = Math.max(v.max, value1);
|
||||||
v.sum += value2;
|
v.sum += value2;
|
||||||
v.count += value3;
|
v.count += value3;
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static long findSegment(int i, int skipSegment, RandomAccessFile raf, long location, long fileSize) throws IOException {
|
private static long findSegment(int i, int skipSegment, RandomAccessFile raf, long location, long fileSize) throws IOException {
|
||||||
if (i != skipSegment) {
|
if (i != skipSegment) {
|
||||||
raf.seek(location);
|
raf.seek(location);
|
||||||
while (location < fileSize) {
|
while (location < fileSize) {
|
||||||
location++;
|
location++;
|
||||||
if (raf.read() == '\n')
|
if (raf.read() == '\n')
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
return location;
|
||||||
}
|
}
|
||||||
return location;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class Result {
|
class Result {
|
||||||
double min, max, sum;
|
double min, max, sum;
|
||||||
long count;
|
long count;
|
||||||
|
|
||||||
Result(double value) {
|
Result(double value) {
|
||||||
min = max = sum = value;
|
min = max = sum = value;
|
||||||
this.count = 1;
|
this.count = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return round(min) + "/" + round(sum / count) + "/" + round(max);
|
return round(min) + "/" + round(sum / count) + "/" + round(max);
|
||||||
}
|
}
|
||||||
|
|
||||||
double round(double v) {
|
double round(double v) {
|
||||||
return Math.round(v * 10.0) / 10.0;
|
return Math.round(v * 10.0) / 10.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
record Pair(int slot, Result slotValue) {
|
record Entry(byte[] key, Result value) {
|
||||||
}
|
}
|
||||||
|
|
||||||
record Entry(byte[] key, Result value) {
|
record FileSegment(long start, long end) {
|
||||||
}
|
}
|
||||||
|
|
||||||
record FileSegment(long start, long end) {
|
|
||||||
}
|
|
||||||
|
|
||||||
class ByteArrayToResultMap {
|
class ByteArrayToResultMap {
|
||||||
public static final int MAPSIZE = 1024*128;
|
public static final int MAPSIZE = 1024 * 128;
|
||||||
Result[] slots = new Result[MAPSIZE];
|
Result[] slots = new Result[MAPSIZE];
|
||||||
byte[][] keys = new byte[MAPSIZE][];
|
byte[][] keys = new byte[MAPSIZE][];
|
||||||
|
|
||||||
private int hashCode(byte[] a, int fromIndex, int length) {
|
public void putOrMerge(byte[] key, int offset, int size, double temp, int hash) {
|
||||||
int result = 0;
|
int slot = hash & (slots.length - 1);
|
||||||
int end = fromIndex + length;
|
var slotValue = slots[slot];
|
||||||
for (int i = fromIndex; i < end; i++) {
|
// Linear probe for open slot
|
||||||
result = 31 * result + a[i];
|
while (slotValue != null && (keys[slot].length != size || !Arrays.equals(keys[slot], 0, size, key, offset, size))) {
|
||||||
|
slot = (slot + 1) & (slots.length - 1);
|
||||||
|
slotValue = slots[slot];
|
||||||
|
}
|
||||||
|
Result value = slotValue;
|
||||||
|
if (value == null) {
|
||||||
|
slots[slot] = new Result(temp);
|
||||||
|
byte[] bytes = new byte[size];
|
||||||
|
System.arraycopy(key, offset, bytes, 0, size);
|
||||||
|
keys[slot] = bytes;
|
||||||
|
} else {
|
||||||
|
value.min = Math.min(value.min, temp);
|
||||||
|
value.max = Math.max(value.max, temp);
|
||||||
|
value.sum += temp;
|
||||||
|
value.count += 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
private Pair getPair(byte[] key, int offset, int size) {
|
// Get all pairs
|
||||||
int hash = hashCode(key, offset, size);
|
public List<Entry> getAll() {
|
||||||
int slot = hash & (slots.length - 1);
|
List<Entry> result = new ArrayList<>(slots.length);
|
||||||
var slotValue = slots[slot];
|
for (int i = 0; i < slots.length; i++) {
|
||||||
// Linear probe for open slot
|
Result slotValue = slots[i];
|
||||||
while (slotValue != null && (keys[slot].length != size || !Arrays.equals(keys[slot], 0, size, key, offset, size))) {
|
if (slotValue != null) {
|
||||||
slot = (slot + 1) & (slots.length - 1);
|
result.add(new Entry(keys[i], slotValue));
|
||||||
slotValue = slots[slot];
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
return new Pair(slot, slotValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void putOrMerge(byte[] key, int offset, int size, Supplier<Result> supplier, Consumer<Result> merge) {
|
|
||||||
Pair result = getPair(key, offset, size);
|
|
||||||
Result value = result.slotValue();
|
|
||||||
if (value == null) {
|
|
||||||
int slot = result.slot();
|
|
||||||
slots[slot] = supplier.get();
|
|
||||||
byte[] bytes = new byte[size];
|
|
||||||
System.arraycopy(key, offset, bytes, 0, size);
|
|
||||||
keys[slot] = bytes;
|
|
||||||
} else {
|
|
||||||
merge.accept(value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get all pairs
|
|
||||||
public List<Entry> getAll() {
|
|
||||||
List<Entry> result = new ArrayList<>();
|
|
||||||
for (int i = 0; i < slots.length; i++) {
|
|
||||||
Result slotValue = slots[i];
|
|
||||||
if (slotValue != null) {
|
|
||||||
result.add(new Entry(keys[i], slotValue));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user