rby: Has some interesting optimisations but could be improved further with a custom hash map
* rby: Could be improved with a custom hashmap * Flag not needed * Fixes the tests when running ./test.sh rby
This commit is contained in:
parent
d617039d10
commit
e8a3011aca
20
calculate_average_rby.sh
Executable file
20
calculate_average_rby.sh
Executable file
@ -0,0 +1,20 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copyright 2023 The original authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
JAVA_OPTS=""
|
||||
time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_rby
|
223
src/main/java/dev/morling/onebrc/CalculateAverage_rby.java
Normal file
223
src/main/java/dev/morling/onebrc/CalculateAverage_rby.java
Normal file
@ -0,0 +1,223 @@
|
||||
/*
|
||||
* Copyright 2023 The original authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package dev.morling.onebrc;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.*;
|
||||
import java.nio.channels.*;
|
||||
import java.nio.file.*;
|
||||
import java.util.*;
|
||||
import java.util.stream.*;
|
||||
|
||||
public class CalculateAverage_rby {
|
||||
|
||||
private static final String FILE = "./measurements.txt";
|
||||
// private static final int CHUNK_SIZE = 8 * 1024 * 1024;
|
||||
private static final int CHUNK_SIZE = 32 << 20;
|
||||
|
||||
/**
|
||||
* Computes good enough partitions which end on a newline
|
||||
*/
|
||||
static long[] cuts(Path p, int workers) throws IOException {
|
||||
var channel = (FileChannel) Files.newByteChannel(p, EnumSet.of(StandardOpenOption.READ));
|
||||
final long size = channel.size();
|
||||
|
||||
if (size < 10000l) {
|
||||
return new long[]{ 0l, size };
|
||||
}
|
||||
long chunk = size / workers;
|
||||
long position = size - chunk;
|
||||
|
||||
long[] cuts = new long[workers + 1];
|
||||
cuts[workers] = size;
|
||||
// 1024 should cover enough to catch a newline
|
||||
var buf = ByteBuffer.allocateDirect(1024);
|
||||
byte[] bytes = new byte[1024];
|
||||
|
||||
while (workers-- > 0) {
|
||||
var read = channel.read(buf, position);
|
||||
buf.flip();
|
||||
buf.get(bytes, 0, read);
|
||||
var nextNL = position;
|
||||
while (read-- > 0) {
|
||||
if (bytes[read] == '\n') {
|
||||
nextNL += read;
|
||||
cuts[workers] = nextNL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
position -= chunk;
|
||||
buf.rewind();
|
||||
}
|
||||
cuts[0] = 0L;
|
||||
return cuts;
|
||||
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
var p = Paths.get(FILE);
|
||||
var cpus = Runtime.getRuntime().availableProcessors();
|
||||
final long[] cuts = cuts(p, cpus);
|
||||
|
||||
var stats = IntStream.range(0, cuts.length - 1)
|
||||
.parallel()
|
||||
.mapToObj((i) -> stats(p, cuts[i], cuts[i + 1]))
|
||||
.reduce(Stats.IDENTITY, Stats::combine);
|
||||
|
||||
stats.print();
|
||||
|
||||
}
|
||||
|
||||
static record Stats(Map<String, Integer> indexes, int nextIx, int[] stats) {
|
||||
private final static Stats IDENTITY = new Stats(new HashMap(), 0, new int[0]);
|
||||
// not much optimization needed here
|
||||
Stats combine(Stats other) {
|
||||
if (this == IDENTITY) return other;
|
||||
if (other == IDENTITY) return this;
|
||||
var myNextIx = nextIx;
|
||||
for(var e : other.indexes.entrySet()) {
|
||||
int ix;
|
||||
var ixi = indexes.get(e.getKey());
|
||||
if ( ixi == null) {
|
||||
ix = myNextIx++ * 4;
|
||||
} else {
|
||||
ix = ixi.intValue() * 4;
|
||||
}
|
||||
var oix = e.getValue() * 4;
|
||||
stats[ix] = Math.min(stats[ix], other.stats[oix]);
|
||||
stats[ix + 1] = Math.max(stats[ix + 1], other.stats[oix + 1]);
|
||||
stats[ix + 2] += other.stats[oix + 2];
|
||||
stats[ix + 3] += other.stats[oix + 3];
|
||||
}
|
||||
return new Stats(indexes, myNextIx, stats);
|
||||
}
|
||||
// or here
|
||||
void print() {
|
||||
var iter = new TreeMap<>(indexes).entrySet().iterator();
|
||||
System.out.print("{");
|
||||
if (iter.hasNext()) {
|
||||
var e = iter.next();
|
||||
var ix = e.getValue().intValue() * 4;
|
||||
var avg = Math.round(stats[ix + 2]/((double)stats[ix+3]))/10.0;
|
||||
System.out.print(e.getKey() + "="
|
||||
+ (stats[ix]/10.0) + "/"
|
||||
+ avg + "/"
|
||||
+ (stats[ix + 1]/10.0));
|
||||
}
|
||||
while(iter.hasNext()) {
|
||||
var e = iter.next();
|
||||
var ix = e.getValue().intValue() * 4;
|
||||
var avg = Math.round(stats[ix + 2]/((double)stats[ix+3]))/10.0;
|
||||
System.out.print(", " + e.getKey() + "="
|
||||
+ (stats[ix]/10.0) + "/"
|
||||
+ avg + "/"
|
||||
+ (stats[ix + 1]/10.0)) ;
|
||||
}
|
||||
System.out.println("}");
|
||||
}
|
||||
}
|
||||
|
||||
static final int MAX_CITIES = 1000;
|
||||
static final int ARRAY_SIZE = 1 << 20;
|
||||
|
||||
static Stats stats(Path p, long start, long end) {
|
||||
int nextCityIx = 0;
|
||||
var cityIndexes = new HashMap<String, Integer>(MAX_CITIES, 1.0f);
|
||||
int[] stats = new int[MAX_CITIES * 4];
|
||||
for (int i = 0; i < MAX_CITIES; i++) {
|
||||
stats[i * 4] = Integer.MAX_VALUE;
|
||||
stats[i * 4 + 1] = Integer.MIN_VALUE;
|
||||
}
|
||||
|
||||
try {
|
||||
final var channel = (FileChannel) Files.newByteChannel(p, EnumSet.of(StandardOpenOption.READ));
|
||||
channel.position(start);
|
||||
var offset = start;
|
||||
final byte[] array = new byte[ARRAY_SIZE];
|
||||
// the next expected char, the most simple stateMachine
|
||||
char nextChar = ';';
|
||||
// good enough for a city name, or a double
|
||||
byte[] strbuff = new byte[128];
|
||||
int strbuffIx = 0;
|
||||
int cityIndex = 0;
|
||||
final var buffer = ByteBuffer.allocateDirect(CHUNK_SIZE);
|
||||
|
||||
while (offset < end) {
|
||||
final int limit = channel.read(buffer);
|
||||
if (limit <= 0)
|
||||
break;
|
||||
offset += limit;
|
||||
int totalRead = 0;
|
||||
buffer.flip();
|
||||
while (totalRead < limit) {
|
||||
int read = Math.min(array.length, limit - totalRead);
|
||||
buffer.get(array, 0, read);
|
||||
totalRead += read;
|
||||
|
||||
for (int i = 0; i < read; i++) {
|
||||
if (nextChar == '\n' && array[i] == '.')
|
||||
continue;
|
||||
strbuff[strbuffIx++] = array[i];
|
||||
if (array[i] == nextChar) {
|
||||
var str = new String(strbuff, 0, strbuffIx - 1, "utf8");
|
||||
strbuffIx = 0;
|
||||
switch (nextChar) {
|
||||
case ';':
|
||||
nextChar = '\n';
|
||||
var mbCityIx = cityIndexes.get(str);
|
||||
if (mbCityIx == null) {
|
||||
cityIndex = nextCityIx;
|
||||
cityIndexes.put(str, nextCityIx++);
|
||||
if (nextCityIx * 4 >= stats.length) {
|
||||
var newStats = Arrays.copyOf(stats, stats.length * 2);
|
||||
for (int j = stats.length; j < newStats.length; j += 4) {
|
||||
newStats[j] = Integer.MAX_VALUE;
|
||||
newStats[j + 1] = Integer.MIN_VALUE;
|
||||
}
|
||||
stats = newStats;
|
||||
}
|
||||
}
|
||||
else {
|
||||
cityIndex = mbCityIx.intValue();
|
||||
}
|
||||
break;
|
||||
case '\n':
|
||||
nextChar = ';';
|
||||
int temp = Integer.parseInt(str);
|
||||
var ix = cityIndex * 4;
|
||||
if (temp < stats[ix])
|
||||
stats[ix] = temp;
|
||||
if (temp > stats[ix + 1])
|
||||
stats[ix + 1] = temp;
|
||||
stats[ix + 2] += temp;
|
||||
stats[ix + 3]++;
|
||||
|
||||
break;
|
||||
default:
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
buffer.rewind();
|
||||
}
|
||||
return new Stats(cityIndexes, nextCityIx, stats);
|
||||
}
|
||||
catch (IOException err) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user