Improvement in CalculateAverage_yavuztas (#162)
* improve double reading by eleminating string parsing in between, make calculations over on integer instead of double, parse into double at the end only once * more improvements, sharing a single StringBuilder to build all toStrings, minor performance gain. * micro optimizations on reading temperature * a small skip for redundant traverses, micro optmization * micro optimization, eleminate some if cases, saves 0.5 seconds more * micro optimization, calculate key hash ahead eleminates more more loop, saves 0.5 seconds more :) * optimize key equals and handling the case when a region is larger than max integer size --------- Co-authored-by: Yavuz Tas <yavuz.tas@ing.com>
This commit is contained in:
parent
9879ff5034
commit
e8b2d2d7b4
@ -37,19 +37,29 @@ public class CalculateAverage_yavuztas {
|
|||||||
private static final Path FILE = Path.of("./measurements.txt");
|
private static final Path FILE = Path.of("./measurements.txt");
|
||||||
|
|
||||||
static class Measurement {
|
static class Measurement {
|
||||||
private double min;
|
|
||||||
private double max;
|
|
||||||
private double sum;
|
|
||||||
private int count = 1;
|
|
||||||
|
|
||||||
public Measurement(double initial) {
|
// Only accessed by a single thread, so it is safe to share
|
||||||
|
private static final StringBuilder STRING_BUILDER = new StringBuilder(14);
|
||||||
|
|
||||||
|
private int min; // calculations over int is faster than double, we convert to double in the end only once
|
||||||
|
private int max;
|
||||||
|
private long sum;
|
||||||
|
private long count = 1;
|
||||||
|
|
||||||
|
public Measurement(int initial) {
|
||||||
this.min = initial;
|
this.min = initial;
|
||||||
this.max = initial;
|
this.max = initial;
|
||||||
this.sum = initial;
|
this.sum = initial;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return round(this.min) + "/" + round(this.sum / this.count) + "/" + round(this.max);
|
STRING_BUILDER.setLength(0); // clear the builder to reuse
|
||||||
|
STRING_BUILDER.append(this.min / 10.0); // convert to double while generating the string output
|
||||||
|
STRING_BUILDER.append("/");
|
||||||
|
STRING_BUILDER.append(round((this.sum / 10.0) / this.count));
|
||||||
|
STRING_BUILDER.append("/");
|
||||||
|
STRING_BUILDER.append(this.max / 10.0);
|
||||||
|
return STRING_BUILDER.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
private double round(double value) {
|
private double round(double value) {
|
||||||
@ -59,24 +69,23 @@ public class CalculateAverage_yavuztas {
|
|||||||
|
|
||||||
static class KeyBuffer {
|
static class KeyBuffer {
|
||||||
|
|
||||||
ByteBuffer value;
|
ByteBuffer buffer;
|
||||||
|
int length;
|
||||||
int hash;
|
int hash;
|
||||||
|
|
||||||
public KeyBuffer(ByteBuffer buffer) {
|
public KeyBuffer(ByteBuffer buffer, int length, int hash) {
|
||||||
this.value = buffer;
|
this.buffer = buffer;
|
||||||
this.hash = buffer.hashCode();
|
this.length = length;
|
||||||
|
this.hash = hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object o) {
|
public boolean equals(Object o) {
|
||||||
if (this == o)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
final KeyBuffer keyBuffer = (KeyBuffer) o;
|
final KeyBuffer keyBuffer = (KeyBuffer) o;
|
||||||
if (o == null || getClass() != o.getClass() || this.hash != keyBuffer.hash)
|
if (this.length != keyBuffer.length || this.hash != keyBuffer.hash)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return this.value.equals(keyBuffer.value);
|
return this.buffer.equals(keyBuffer.buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -86,20 +95,14 @@ public class CalculateAverage_yavuztas {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
final int limit = this.value.limit();
|
final byte[] bytes = new byte[this.length];
|
||||||
final byte[] bytes = new byte[limit];
|
this.buffer.get(bytes);
|
||||||
this.value.get(bytes);
|
return new String(bytes, 0, this.length, StandardCharsets.UTF_8);
|
||||||
return new String(bytes, 0, limit, StandardCharsets.UTF_8);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class FixedRegionDataAccessor {
|
static class FixedRegionDataAccessor {
|
||||||
|
|
||||||
static final byte SEMI_COLON = 59; // ';'
|
|
||||||
static final byte LINE_BREAK = 10; // '\n'
|
|
||||||
|
|
||||||
final byte[] workBuffer = new byte[256]; // assuming max 256 bytes for a row is enough
|
|
||||||
|
|
||||||
long startPos;
|
long startPos;
|
||||||
long size;
|
long size;
|
||||||
ByteBuffer buffer;
|
ByteBuffer buffer;
|
||||||
@ -111,30 +114,35 @@ public class CalculateAverage_yavuztas {
|
|||||||
this.buffer = buffer;
|
this.buffer = buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
void traverse(BiConsumer<KeyBuffer, Double> consumer) {
|
void traverse(BiConsumer<KeyBuffer, Integer> consumer) {
|
||||||
|
int keyHash;
|
||||||
int semiColonPos = 0;
|
int length;
|
||||||
int lineBreakPos = 0;
|
|
||||||
while (this.buffer.hasRemaining()) {
|
while (this.buffer.hasRemaining()) {
|
||||||
|
|
||||||
while ((this.workBuffer[0] = this.buffer.get()) != LINE_BREAK) {
|
this.position = this.buffer.position(); // save line start pos
|
||||||
if (this.workBuffer[0] == SEMI_COLON) { // save semicolon pos
|
|
||||||
semiColonPos = this.buffer.position(); // semicolon exclusive
|
byte b;
|
||||||
}
|
keyHash = 0;
|
||||||
|
length = 0;
|
||||||
|
while ((b = this.buffer.get()) != ';') { // read until semicolon
|
||||||
|
keyHash = 31 * keyHash + b; // calculate key hash ahead, eleminates one more loop later
|
||||||
|
length++;
|
||||||
}
|
}
|
||||||
// found linebreak
|
|
||||||
lineBreakPos = this.buffer.position();
|
|
||||||
|
|
||||||
this.buffer.position(this.position); // set back to line start
|
final ByteBuffer station = this.buffer.slice(this.position, length);
|
||||||
final int length1 = semiColonPos - this.position; // station length
|
final KeyBuffer key = new KeyBuffer(station, length, keyHash);
|
||||||
final int length2 = lineBreakPos - semiColonPos; // temperature length
|
|
||||||
|
|
||||||
final ByteBuffer station = getRef(length1); // read station
|
this.buffer.mark(); // semicolon pos
|
||||||
final String temperature = readString(length2); // read temperature
|
skip(3); // skip more since minimum temperature length is 3
|
||||||
|
length = 4; // +1 for semicolon
|
||||||
|
|
||||||
this.position = lineBreakPos; // skip to line end
|
while (this.buffer.get() != '\n') {
|
||||||
|
length++; // read until linebreak
|
||||||
|
// TODO how to read temperature here
|
||||||
|
}
|
||||||
|
|
||||||
consumer.accept(new KeyBuffer(station), Double.parseDouble(temperature));
|
this.buffer.reset(); // set to after semicolon
|
||||||
|
consumer.accept(key, readTemperature(length));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -157,21 +165,40 @@ public class CalculateAverage_yavuztas {
|
|||||||
return initial;
|
return initial;
|
||||||
}
|
}
|
||||||
|
|
||||||
String readString(int length) {
|
// caching Math.pow calculation improves a lot!
|
||||||
this.buffer.get(this.workBuffer, 0, length);
|
// interestingly, instance field access is much faster than static field access
|
||||||
return new String(this.workBuffer, 0, length - 1, // strip the last char
|
final int[] powerOfTenCache = new int[]{ 1, 10, 100 };
|
||||||
StandardCharsets.UTF_8);
|
|
||||||
|
int readTemperature(int length) {
|
||||||
|
int temp = 0;
|
||||||
|
final byte b1 = this.buffer.get(); // get first byte
|
||||||
|
|
||||||
|
int digits = length - 4; // digit position
|
||||||
|
final boolean negative = b1 == '-';
|
||||||
|
if (!negative) {
|
||||||
|
temp += this.powerOfTenCache[digits + 1] * (b1 - 48); // add first digit ahead
|
||||||
|
}
|
||||||
|
|
||||||
|
byte b;
|
||||||
|
while ((b = this.buffer.get()) != '.') { // read until dot
|
||||||
|
temp += this.powerOfTenCache[digits--] * (b - 48);
|
||||||
|
}
|
||||||
|
b = this.buffer.get(); // read after dot, only one digit no loop
|
||||||
|
temp += this.powerOfTenCache[digits] * (b - 48);
|
||||||
|
this.buffer.get(); // skip line break
|
||||||
|
|
||||||
|
return (negative) ? -temp : temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
ByteBuffer getRef(int length) {
|
ByteBuffer getKeyRef(int length) {
|
||||||
final ByteBuffer slice = this.buffer.slice().limit(length - 1);
|
final ByteBuffer slice = this.buffer.slice().limit(length - 1);
|
||||||
skip(this.buffer, length);
|
skip(length);
|
||||||
return slice;
|
return slice;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void skip(ByteBuffer buffer, int length) {
|
void skip(int length) {
|
||||||
final int pos = buffer.position();
|
final int pos = this.buffer.position();
|
||||||
buffer.position(pos + length);
|
this.buffer.position(pos + length);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -187,11 +214,11 @@ public class CalculateAverage_yavuztas {
|
|||||||
final long fileSize = Files.size(path);
|
final long fileSize = Files.size(path);
|
||||||
long regionSize = fileSize / concurrency;
|
long regionSize = fileSize / concurrency;
|
||||||
|
|
||||||
if (regionSize > Integer.MAX_VALUE) {
|
|
||||||
// TODO multiply concurrency and try again
|
|
||||||
throw new IllegalArgumentException("Bigger than integer!");
|
|
||||||
}
|
|
||||||
// handling extreme cases
|
// handling extreme cases
|
||||||
|
while (regionSize > Integer.MAX_VALUE) {
|
||||||
|
concurrency *= 2;
|
||||||
|
regionSize = fileSize / concurrency;
|
||||||
|
}
|
||||||
if (regionSize <= 256) { // small file, no need concurrency
|
if (regionSize <= 256) { // small file, no need concurrency
|
||||||
concurrency = 1;
|
concurrency = 1;
|
||||||
regionSize = fileSize;
|
regionSize = fileSize;
|
||||||
@ -251,7 +278,7 @@ public class CalculateAverage_yavuztas {
|
|||||||
private static int findClosestLineEnd(int regionSize, ByteBuffer buffer) {
|
private static int findClosestLineEnd(int regionSize, ByteBuffer buffer) {
|
||||||
int position = regionSize;
|
int position = regionSize;
|
||||||
int left = regionSize;
|
int left = regionSize;
|
||||||
while (buffer.get(position) != FixedRegionDataAccessor.LINE_BREAK) {
|
while (buffer.get(position) != '\n') {
|
||||||
position = --left;
|
position = --left;
|
||||||
}
|
}
|
||||||
return position;
|
return position;
|
||||||
|
Loading…
Reference in New Issue
Block a user