1brc submission by godofwharf (#658)
* 1brc submission by godofwharf * Fix prepare script * Modify shebang * Fix formatting * Remove unused FastHashMap implementation
This commit is contained in:
parent
540ef2c863
commit
d1cdb8587c
19
calculate_average_godofwharf.sh
Executable file
19
calculate_average_godofwharf.sh
Executable file
@ -0,0 +1,19 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# Copyright 2023 The original authors
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
JAVA_OPTS="--enable-preview --add-modules jdk.incubator.vector -DpageSize=262144 -XX:+UseParallelGC -Xms2600m -XX:ParallelGCThreads=8 -XX:Tier4CompileThreshold=1000 -XX:Tier3CompileThreshold=500 -XX:Tier3CompileThreshold=250 -Dthreads=9 -Djava.util.concurrent.ForkJoinPool.common.parallelism=9"
|
||||||
|
java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_godofwharf 2>/dev/null
|
19
prepare_godofwharf.sh
Executable file
19
prepare_godofwharf.sh
Executable file
@ -0,0 +1,19 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Copyright 2023 The original authors
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
source "$HOME/.sdkman/bin/sdkman-init.sh"
|
||||||
|
sdk use java 21.0.2-tem 1>&2
|
@ -0,0 +1,588 @@
|
|||||||
|
package dev.morling.onebrc;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright 2023 The original authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import jdk.incubator.vector.ByteVector;
|
||||||
|
import jdk.incubator.vector.Vector;
|
||||||
|
import jdk.incubator.vector.VectorSpecies;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.RandomAccessFile;
|
||||||
|
import java.lang.foreign.Arena;
|
||||||
|
import java.lang.foreign.MemorySegment;
|
||||||
|
import java.lang.foreign.ValueLayout;
|
||||||
|
import java.lang.management.ManagementFactory;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.channels.FileChannel;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.Future;
|
||||||
|
import java.util.function.BiConsumer;
|
||||||
|
import java.util.stream.IntStream;
|
||||||
|
|
||||||
|
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||||
|
|
||||||
|
public class CalculateAverage_godofwharf {
|
||||||
|
private static final String FILE = "./measurements.txt";
|
||||||
|
private static final boolean DEBUG = Boolean.parseBoolean(System.getProperty("debug", "false"));
|
||||||
|
private static final int NCPU = Runtime.getRuntime().availableProcessors();
|
||||||
|
|
||||||
|
private static final VectorSpecies<Byte> PREFERRED_SPECIES = VectorSpecies.ofPreferred(byte.class);
|
||||||
|
|
||||||
|
private static final Vector<Byte> NEW_LINE_VEC = PREFERRED_SPECIES.broadcast('\n');
|
||||||
|
// This array is used for quick conversion of fractional part
|
||||||
|
private static final double[] DOUBLES = new double[]{ 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9 };
|
||||||
|
// This array is used for quick conversion from ASCII to digit
|
||||||
|
private static final int[] DIGIT_LOOKUP = new int[]{
|
||||||
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||||
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||||
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||||
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||||
|
-1, -1, -1, -1, -1, -1, -1, -1, 0, 1,
|
||||||
|
2, 3, 4, 5, 6, 7, 8, 9, -1, -1 };
|
||||||
|
private static final int MAX_STR_LEN = 108;
|
||||||
|
private static final int DEFAULT_HASH_TBL_SIZE = 4096;
|
||||||
|
private static final int DEFAULT_PAGE_SIZE = 8_388_608; // 8 MB
|
||||||
|
private static final int PAGE_SIZE = Integer.parseInt(System.getProperty("pageSize", STR."\{DEFAULT_PAGE_SIZE}"));
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
long startTimeMs = System.currentTimeMillis();
|
||||||
|
Map<String, MeasurementAggregator> measurements = compute();
|
||||||
|
long time1 = System.nanoTime();
|
||||||
|
System.out.println(measurements);
|
||||||
|
printDebugMessage("Print took %d ns%n", (System.nanoTime() - time1));
|
||||||
|
printDebugMessage("Took %d ms%n", System.currentTimeMillis() - startTimeMs);
|
||||||
|
printDebugMessage("Time spent on GC=%d ms%n", ManagementFactory.getGarbageCollectorMXBeans().get(0).getCollectionTime());
|
||||||
|
System.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Map<String, MeasurementAggregator> compute() throws Exception {
|
||||||
|
int nThreads = Integer.parseInt(
|
||||||
|
System.getProperty("threads", STR."\{NCPU}"));
|
||||||
|
printDebugMessage("Running program with %d threads %n", nThreads);
|
||||||
|
Job job = new Job(nThreads - 1);
|
||||||
|
job.compute(FILE);
|
||||||
|
return job.sort();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Job {
|
||||||
|
private final int nThreads;
|
||||||
|
private final State[] threadLocalStates;
|
||||||
|
private final Map<String, MeasurementAggregator> globalMap = new ConcurrentHashMap<>(DEFAULT_HASH_TBL_SIZE);
|
||||||
|
private final ExecutorService executorService;
|
||||||
|
|
||||||
|
public Job(final int nThreads) {
|
||||||
|
this.threadLocalStates = new State[(nThreads << 4)];
|
||||||
|
IntStream.range(0, nThreads << 4)
|
||||||
|
.forEach(i -> threadLocalStates[i] = new State());
|
||||||
|
this.nThreads = nThreads;
|
||||||
|
this.executorService = Executors.newFixedThreadPool(nThreads);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void compute(final String path) throws Exception {
|
||||||
|
// Create a random access file so that we can map the contents of the file into native memory for faster access
|
||||||
|
try (RandomAccessFile file = new RandomAccessFile(path, "r")) {
|
||||||
|
// Create a memory segment for the entire file
|
||||||
|
MemorySegment globalSegment = file.getChannel().map(
|
||||||
|
FileChannel.MapMode.READ_ONLY, 0, file.length(), Arena.global());
|
||||||
|
long fileLength = file.length();
|
||||||
|
// Ensure that the split length never exceeds Integer.MAX_VALUE. This is because ByteBuffers cannot
|
||||||
|
// be larger than 2 GiB.
|
||||||
|
int splitLength = (int) Math.min(Integer.MAX_VALUE, Math.max(PAGE_SIZE, Math.rint(fileLength * 1.0 / nThreads)));
|
||||||
|
printDebugMessage("fileLength = %d, splitLength = %d%n", file.length(), splitLength);
|
||||||
|
long time1 = System.nanoTime();
|
||||||
|
// Break the file into multiple splits. One thread would process one split.
|
||||||
|
// This routine makes sure that the splits are uniformly sized to the best extent possible.
|
||||||
|
// Each split would either end with a '\n' character or EOF
|
||||||
|
List<Split> splits = breakFileIntoSplits(file, splitLength, PAGE_SIZE, globalSegment, false);
|
||||||
|
printDebugMessage("Number of splits = %d, splits = [%s]%n", splits.size(), splits);
|
||||||
|
printDebugMessage("Splits calculation took %d ns%n", System.nanoTime() - time1);
|
||||||
|
// consume splits in parallel using the common fork join pool
|
||||||
|
long time = System.nanoTime();
|
||||||
|
List<Future<?>> futures = new ArrayList<>(splits.size() * 2);
|
||||||
|
splits
|
||||||
|
.forEach(split -> {
|
||||||
|
// process splits concurrently using a thread pool
|
||||||
|
futures.add(executorService.submit(() -> {
|
||||||
|
MemorySegment splitSegment = globalSegment.asSlice(split.offset, split.length);
|
||||||
|
splitSegment.load();
|
||||||
|
int tid = (int) Thread.currentThread().threadId();
|
||||||
|
byte[] currentPage = new byte[PAGE_SIZE + MAX_STR_LEN];
|
||||||
|
// iterate over each page in split
|
||||||
|
for (Page page : split.pages) {
|
||||||
|
// this byte buffer should end with '\n' or EOF
|
||||||
|
MemorySegment segment = globalSegment.asSlice(page.offset, page.length);
|
||||||
|
MemorySegment.copy(segment, ValueLayout.JAVA_BYTE, 0L, currentPage, 0, (int) page.length);
|
||||||
|
SearchResult searchResult = findNewLinesVectorized(currentPage, (int) page.length);
|
||||||
|
int prevOffset = 0;
|
||||||
|
int j = 0;
|
||||||
|
// iterate over search results
|
||||||
|
while (j < searchResult.len) {
|
||||||
|
int curOffset = searchResult.offsets[j];
|
||||||
|
byte ch1 = currentPage[curOffset - 4];
|
||||||
|
byte ch2 = currentPage[curOffset - 5];
|
||||||
|
int temperatureLen = 5;
|
||||||
|
if (ch1 == ';') {
|
||||||
|
temperatureLen = 3;
|
||||||
|
}
|
||||||
|
else if (ch2 == ';') {
|
||||||
|
temperatureLen = 4;
|
||||||
|
}
|
||||||
|
int lineLength = curOffset - prevOffset;
|
||||||
|
int stationLen = lineLength - temperatureLen - 1;
|
||||||
|
byte[] station = new byte[stationLen];
|
||||||
|
System.arraycopy(currentPage, prevOffset, station, 0, stationLen);
|
||||||
|
int hashcode = Arrays.hashCode(station);
|
||||||
|
double temperature = NumberUtils.parseDouble2(currentPage, prevOffset + stationLen + 1, temperatureLen);
|
||||||
|
Measurement m = new Measurement(station, temperature, hashcode);
|
||||||
|
threadLocalStates[tid].update(m);
|
||||||
|
prevOffset = curOffset + 1;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
// Explicitly commented out because unload seems to take a lot of time
|
||||||
|
// segment.unload();
|
||||||
|
}
|
||||||
|
mergeInternal(threadLocalStates[tid]);
|
||||||
|
}));
|
||||||
|
});
|
||||||
|
for (Future<?> future : futures) {
|
||||||
|
future.get();
|
||||||
|
}
|
||||||
|
printDebugMessage("Aggregate took %d ns%n", (System.nanoTime() - time));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void mergeInternal(final State state) {
|
||||||
|
state.state.forEach((k, v) -> {
|
||||||
|
globalMap.compute(k.toString(), (ignored, agg) -> {
|
||||||
|
if (agg == null) {
|
||||||
|
agg = v;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
agg.merge(v);
|
||||||
|
}
|
||||||
|
return agg;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, MeasurementAggregator> sort() {
|
||||||
|
long time = System.nanoTime();
|
||||||
|
Map<String, MeasurementAggregator> sortedMap = new TreeMap<>(globalMap);
|
||||||
|
printDebugMessage("Tree map construction took %d ns%n", (System.nanoTime() - time));
|
||||||
|
return sortedMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static LineMetadata findNextOccurrenceOfNewLine(final ByteBuffer buffer,
|
||||||
|
final int capacity,
|
||||||
|
final int offset) {
|
||||||
|
int maxLen = capacity - offset;
|
||||||
|
byte[] src = new byte[Math.min(MAX_STR_LEN, maxLen)];
|
||||||
|
byte[] station = new byte[src.length];
|
||||||
|
byte[] temperature = new byte[5];
|
||||||
|
buffer.position(offset);
|
||||||
|
buffer.get(src);
|
||||||
|
int i = 0;
|
||||||
|
int j = 0;
|
||||||
|
int k = 0;
|
||||||
|
boolean isAscii = true;
|
||||||
|
boolean afterDelim = false;
|
||||||
|
int hashCode = 0;
|
||||||
|
for (; i < src.length; i++) {
|
||||||
|
byte b = src[i];
|
||||||
|
if (b < 0) {
|
||||||
|
isAscii = false;
|
||||||
|
}
|
||||||
|
if (!afterDelim && b != '\n') {
|
||||||
|
if (b == ';') {
|
||||||
|
afterDelim = true;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
hashCode = hashCode * 31 + b;
|
||||||
|
station[j++] = b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (b != '\n') {
|
||||||
|
temperature[k++] = b;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return new LineMetadata(
|
||||||
|
station, temperature, j, k, offset + i + 1, hashCode, isAscii);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (i == 0 & j == 0 && k == 0) {
|
||||||
|
hashCode = -1;
|
||||||
|
}
|
||||||
|
return new LineMetadata(
|
||||||
|
station, temperature, j, k, offset + i, hashCode, isAscii);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static SearchResult findNewLinesVectorized(final byte[] page,
|
||||||
|
final int pageLen) {
|
||||||
|
SearchResult ret = new SearchResult(new int[pageLen / 5], 0);
|
||||||
|
VectorSpecies<Byte> species = PREFERRED_SPECIES;
|
||||||
|
int loopBound = pageLen - species.length() * 4;
|
||||||
|
int i = 0;
|
||||||
|
int j = 0;
|
||||||
|
while (j < loopBound) {
|
||||||
|
Vector<Byte> v1 = ByteVector.fromArray(species, page, j);
|
||||||
|
Vector<Byte> v2 = ByteVector.fromArray(species, page, j + species.length());
|
||||||
|
Vector<Byte> v3 = ByteVector.fromArray(species, page, j + species.length() * 2);
|
||||||
|
Vector<Byte> v4 = ByteVector.fromArray(species, page, j + species.length() * 3);
|
||||||
|
long l1 = NEW_LINE_VEC.eq(v1).toLong();
|
||||||
|
long l2 = NEW_LINE_VEC.eq(v2).toLong();
|
||||||
|
long l3 = NEW_LINE_VEC.eq(v3).toLong();
|
||||||
|
long l4 = NEW_LINE_VEC.eq(v4).toLong();
|
||||||
|
long r1 = l1 & 0xFFFFFFFFL | (l2 << species.length());
|
||||||
|
long r2 = l3 & 0xFFFFFFFFL | (l4 << (species.length()));
|
||||||
|
int b1 = Long.bitCount(r1);
|
||||||
|
int b2 = Long.bitCount(r2);
|
||||||
|
int k = i;
|
||||||
|
int it = b1;
|
||||||
|
while (it > 0) {
|
||||||
|
int idx = Long.numberOfTrailingZeros(r1);
|
||||||
|
ret.offsets[k++] = j + idx;
|
||||||
|
r1 &= (r1 - 1);
|
||||||
|
it--;
|
||||||
|
idx = Long.numberOfTrailingZeros(r1);
|
||||||
|
ret.offsets[k++] = j + idx;
|
||||||
|
r1 &= (r1 - 1);
|
||||||
|
it--;
|
||||||
|
idx = Long.numberOfTrailingZeros(r1);
|
||||||
|
ret.offsets[k++] = j + idx;
|
||||||
|
r1 &= (r1 - 1);
|
||||||
|
it--;
|
||||||
|
idx = Long.numberOfTrailingZeros(r1);
|
||||||
|
ret.offsets[k++] = j + idx;
|
||||||
|
r1 &= (r1 - 1);
|
||||||
|
it--;
|
||||||
|
idx = Long.numberOfTrailingZeros(r1);
|
||||||
|
ret.offsets[k++] = j + idx;
|
||||||
|
r1 &= (r1 - 1);
|
||||||
|
it--;
|
||||||
|
idx = Long.numberOfTrailingZeros(r1);
|
||||||
|
ret.offsets[k++] = j + idx;
|
||||||
|
r1 &= (r1 - 1);
|
||||||
|
it--;
|
||||||
|
}
|
||||||
|
i += b1;
|
||||||
|
j += species.length() * 2;
|
||||||
|
k = i;
|
||||||
|
it = b2;
|
||||||
|
while (it > 0) {
|
||||||
|
int idx = Long.numberOfTrailingZeros(r2);
|
||||||
|
ret.offsets[k++] = j + idx;
|
||||||
|
r2 &= (r2 - 1);
|
||||||
|
it--;
|
||||||
|
idx = Long.numberOfTrailingZeros(r2);
|
||||||
|
ret.offsets[k++] = j + idx;
|
||||||
|
r2 &= (r2 - 1);
|
||||||
|
it--;
|
||||||
|
idx = Long.numberOfTrailingZeros(r2);
|
||||||
|
ret.offsets[k++] = j + idx;
|
||||||
|
r2 &= (r2 - 1);
|
||||||
|
it--;
|
||||||
|
idx = Long.numberOfTrailingZeros(r2);
|
||||||
|
ret.offsets[k++] = j + idx;
|
||||||
|
r2 &= (r2 - 1);
|
||||||
|
it--;
|
||||||
|
idx = Long.numberOfTrailingZeros(r2);
|
||||||
|
ret.offsets[k++] = j + idx;
|
||||||
|
r2 &= (r2 - 1);
|
||||||
|
it--;
|
||||||
|
idx = Long.numberOfTrailingZeros(r2);
|
||||||
|
ret.offsets[k++] = j + idx;
|
||||||
|
r2 &= (r2 - 1);
|
||||||
|
it--;
|
||||||
|
}
|
||||||
|
i += b2;
|
||||||
|
j += species.length() * 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// tail loop
|
||||||
|
while (j < pageLen) {
|
||||||
|
byte b = page[j];
|
||||||
|
if (b == '\n') {
|
||||||
|
ret.offsets[i++] = j;
|
||||||
|
}
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
ret.len = i;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Split> breakFileIntoSplits(final RandomAccessFile file,
|
||||||
|
final int splitLength,
|
||||||
|
final int pageLength,
|
||||||
|
final MemorySegment memorySegment,
|
||||||
|
final boolean enableChecks)
|
||||||
|
throws IOException {
|
||||||
|
final List<Split> splits = new ArrayList<>();
|
||||||
|
// Try to break the file into multiple splits while ensuring that each split has at least splitLength bytes
|
||||||
|
// and ends with '\n' or EOF
|
||||||
|
for (long i = 0; i < file.length();) {
|
||||||
|
long splitStartOffset = i;
|
||||||
|
long splitEndOffset = Math.min(file.length(), splitStartOffset + splitLength); // not inclusive
|
||||||
|
if (splitEndOffset == file.length()) { // reached EOF
|
||||||
|
List<Page> pages = breakSplitIntoPages(splitStartOffset, splitEndOffset, pageLength, memorySegment, enableChecks);
|
||||||
|
splits.add(new Split(splitStartOffset, splitEndOffset - splitStartOffset, pages));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Look past the end offset to find next '\n' or EOF
|
||||||
|
long segmentLength = Math.min(MAX_STR_LEN, file.length() - i);
|
||||||
|
// Create a new memory segment for reading contents beyond splitEndOffset
|
||||||
|
MemorySegment lookahead = memorySegment.asSlice(splitEndOffset, segmentLength);
|
||||||
|
ByteBuffer bb = lookahead.asByteBuffer();
|
||||||
|
// Find the next offset which has either '\n' or EOF
|
||||||
|
LineMetadata lineMetadata = findNextOccurrenceOfNewLine(bb, (int) segmentLength, 0);
|
||||||
|
splitEndOffset += lineMetadata.offset;
|
||||||
|
if (enableChecks &&
|
||||||
|
memorySegment.asSlice(splitEndOffset - 1, 1).asByteBuffer().get(0) != '\n') {
|
||||||
|
throw new IllegalStateException("Page doesn't end with NL char");
|
||||||
|
}
|
||||||
|
// Break the split further into multiple pages based on pageLength
|
||||||
|
List<Page> pages = breakSplitIntoPages(splitStartOffset, splitEndOffset, pageLength, memorySegment, enableChecks);
|
||||||
|
splits.add(new Split(splitStartOffset, splitEndOffset - splitStartOffset, pages));
|
||||||
|
i = splitEndOffset;
|
||||||
|
lookahead.unload();
|
||||||
|
}
|
||||||
|
return splits;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Page> breakSplitIntoPages(final long splitStartOffset,
|
||||||
|
final long splitEndOffset,
|
||||||
|
final int pageLength,
|
||||||
|
final MemorySegment memorySegment,
|
||||||
|
final boolean enableChecks) {
|
||||||
|
List<Page> pages = new ArrayList<>();
|
||||||
|
for (long i = splitStartOffset; i < splitEndOffset;) {
|
||||||
|
long pageStartOffset = i;
|
||||||
|
long pageEndOffset = Math.min(splitEndOffset, pageStartOffset + pageLength); // not inclusive
|
||||||
|
if (pageEndOffset == splitEndOffset) {
|
||||||
|
pages.add(new Page(pageStartOffset, pageEndOffset - pageStartOffset));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Look past the end offset to find next '\n' till we reach the end of split
|
||||||
|
long lookaheadLength = Math.min(MAX_STR_LEN, splitEndOffset - i);
|
||||||
|
MemorySegment lookahead = memorySegment.asSlice(pageEndOffset, lookaheadLength);
|
||||||
|
ByteBuffer bb = lookahead.asByteBuffer();
|
||||||
|
// Find next offset which has either '\n' or the end of split
|
||||||
|
LineMetadata lineMetadata = findNextOccurrenceOfNewLine(bb, (int) lookaheadLength, 0);
|
||||||
|
pageEndOffset += lineMetadata.offset;
|
||||||
|
if (enableChecks &&
|
||||||
|
memorySegment.asSlice(pageEndOffset - 1, 1).asByteBuffer().get(0) != '\n') {
|
||||||
|
throw new IllegalStateException("Page doesn't end with NL char");
|
||||||
|
}
|
||||||
|
pages.add(new Page(pageStartOffset, pageEndOffset - pageStartOffset));
|
||||||
|
i = pageEndOffset;
|
||||||
|
lookahead.unload();
|
||||||
|
}
|
||||||
|
return pages;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class State {
|
||||||
|
private final Map<AggregationKey, MeasurementAggregator> state;
|
||||||
|
|
||||||
|
public State() {
|
||||||
|
this.state = new HashMap<>(DEFAULT_HASH_TBL_SIZE);
|
||||||
|
// insert a DUMMY key to prime the hashmap for usage
|
||||||
|
AggregationKey dummy = new AggregationKey("DUMMY".getBytes(UTF_8), -1);
|
||||||
|
this.state.put(dummy, null);
|
||||||
|
this.state.remove(dummy);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void update(final Measurement m) {
|
||||||
|
MeasurementAggregator agg = state.get(m.aggregationKey);
|
||||||
|
if (agg == null) {
|
||||||
|
state.put(m.aggregationKey, new MeasurementAggregator(m.temperature, m.temperature, m.temperature, 1L));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
agg.count++;
|
||||||
|
agg.min = m.temperature <= agg.min ? m.temperature : agg.min;
|
||||||
|
agg.max = m.temperature >= agg.max ? m.temperature : agg.max;
|
||||||
|
agg.sum += m.temperature;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class AggregationKey {
|
||||||
|
private final byte[] station;
|
||||||
|
private final int hashCode;
|
||||||
|
|
||||||
|
public AggregationKey(final byte[] station,
|
||||||
|
final int hashCode) {
|
||||||
|
this.station = station;
|
||||||
|
this.hashCode = hashCode;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return new String(station, UTF_8);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return hashCode;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object other) {
|
||||||
|
if (!(other instanceof AggregationKey)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
AggregationKey sk = (AggregationKey) other;
|
||||||
|
return station.length == sk.station.length && Arrays.mismatch(station, sk.station) < 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class MeasurementAggregator {
|
||||||
|
private double min;
|
||||||
|
private double max;
|
||||||
|
private double sum;
|
||||||
|
private long count;
|
||||||
|
|
||||||
|
public MeasurementAggregator(final double min,
|
||||||
|
final double max,
|
||||||
|
final double sum,
|
||||||
|
final long count) {
|
||||||
|
this.min = min;
|
||||||
|
this.max = max;
|
||||||
|
this.sum = sum;
|
||||||
|
this.count = count;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
double min1 = round(min);
|
||||||
|
double max1 = round(max);
|
||||||
|
double mean = round(round(sum) / count);
|
||||||
|
return min1 + "/" + mean + "/" + max1;
|
||||||
|
}
|
||||||
|
|
||||||
|
private double round(double value) {
|
||||||
|
return Math.round(value * 10.0) / 10.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void merge(final MeasurementAggregator m2) {
|
||||||
|
count += m2.count;
|
||||||
|
min = Math.min(min, m2.min);
|
||||||
|
max = Math.max(max, m2.max);
|
||||||
|
sum += m2.sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class NumberUtils {
|
||||||
|
public static int toDigit(final char c) {
|
||||||
|
return DIGIT_LOOKUP[c];
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int fastMul10(final int i) {
|
||||||
|
return (i << 1) + (i << 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static double parseDouble2(final byte[] b,
|
||||||
|
final int offset,
|
||||||
|
final int len) {
|
||||||
|
try {
|
||||||
|
char ch0 = (char) b[offset];
|
||||||
|
char ch1 = (char) b[offset + 1];
|
||||||
|
char ch2 = (char) b[offset + 2];
|
||||||
|
char ch3 = len > 3 ? (char) b[offset + 3] : ' ';
|
||||||
|
char ch4 = len > 4 ? (char) b[offset + 4] : ' ';
|
||||||
|
if (len == 3) {
|
||||||
|
int decimal = toDigit(ch0);
|
||||||
|
double fractional = DOUBLES[toDigit(ch2)];
|
||||||
|
return decimal + fractional;
|
||||||
|
}
|
||||||
|
else if (len == 4) {
|
||||||
|
// -1.2 or 11.2
|
||||||
|
int decimal = (ch0 == '-' ? toDigit(ch1) : (fastMul10(toDigit(ch0)) + toDigit(ch1)));
|
||||||
|
double fractional = DOUBLES[toDigit(ch3)];
|
||||||
|
if (ch0 == '-') {
|
||||||
|
return Math.negateExact(decimal) - fractional;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return decimal + fractional;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
int decimal = fastMul10(toDigit(ch1)) + toDigit(ch2);
|
||||||
|
double fractional = DOUBLES[toDigit(ch4)];
|
||||||
|
return Math.negateExact(decimal) - fractional;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (ArrayIndexOutOfBoundsException e) {
|
||||||
|
printDebugMessage("Array index out of bounds for string: %s%n", new String(b, 0, len));
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
catch (StringIndexOutOfBoundsException e) {
|
||||||
|
printDebugMessage("String index out of bounds for string: %s%n", new String(b, 0, len));
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// record classes
|
||||||
|
record Measurement(byte[] station,
|
||||||
|
double temperature,
|
||||||
|
int hash,
|
||||||
|
State.AggregationKey aggregationKey) {
|
||||||
|
|
||||||
|
public Measurement(byte[] station,
|
||||||
|
double temperature,
|
||||||
|
int hashCode) {
|
||||||
|
this(station,
|
||||||
|
temperature,
|
||||||
|
hashCode,
|
||||||
|
new State.AggregationKey(station, hashCode));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
record LineMetadata(byte[] station,
|
||||||
|
byte[] temperature,
|
||||||
|
int stationLen,
|
||||||
|
int temperatureLen,
|
||||||
|
int offset,
|
||||||
|
int precomputedHashCode, boolean isAscii) {
|
||||||
|
}
|
||||||
|
|
||||||
|
record Split(long offset, long length, List<Page> pages) {
|
||||||
|
}
|
||||||
|
|
||||||
|
record Page(long offset, long length) {
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class SearchResult {
|
||||||
|
private int[] offsets;
|
||||||
|
private int len;
|
||||||
|
|
||||||
|
public SearchResult(final int[] offsets,
|
||||||
|
final int len) {
|
||||||
|
this.offsets = offsets;
|
||||||
|
this.len = len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void printDebugMessage(final String message,
|
||||||
|
final Object... args) {
|
||||||
|
if (DEBUG) {
|
||||||
|
System.err.printf(message, args);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user